agentv 1.2.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -141,30 +141,14 @@ var require_dist = __commonJS({
141
141
  });
142
142
 
143
143
  // src/index.ts
144
- import { readFileSync as readFileSync2 } from "node:fs";
144
+ import { readFileSync as readFileSync3 } from "node:fs";
145
145
  import { binary, run, subcommands as subcommands2 } from "cmd-ts";
146
146
 
147
- // src/commands/eval/index.ts
148
- import { stat as stat4 } from "node:fs/promises";
149
- import path20 from "node:path";
150
- import {
151
- command,
152
- flag,
153
- number as number4,
154
- option,
155
- optional as optional2,
156
- restPositionals,
157
- string as string4
158
- } from "cmd-ts";
159
- import fg from "fast-glob";
160
-
161
- // src/commands/eval/run-eval.ts
162
- import { constants as constants6 } from "node:fs";
163
- import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
164
- import path19 from "node:path";
165
- import { pathToFileURL } from "node:url";
147
+ // src/commands/convert/index.ts
148
+ import { readFileSync, writeFileSync } from "node:fs";
149
+ import path14 from "node:path";
166
150
 
167
- // ../../packages/core/dist/chunk-V3JCB3HI.js
151
+ // ../../packages/core/dist/chunk-KPHTMTZ3.js
168
152
  import { constants } from "node:fs";
169
153
  import { access, readFile } from "node:fs/promises";
170
154
  import path from "node:path";
@@ -648,8 +632,8 @@ function getErrorMap() {
648
632
 
649
633
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
650
634
  var makeIssue = (params) => {
651
- const { data, path: path27, errorMaps, issueData } = params;
652
- const fullPath = [...path27, ...issueData.path || []];
635
+ const { data, path: path28, errorMaps, issueData } = params;
636
+ const fullPath = [...path28, ...issueData.path || []];
653
637
  const fullIssue = {
654
638
  ...issueData,
655
639
  path: fullPath
@@ -765,11 +749,11 @@ var errorUtil;
765
749
 
766
750
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
767
751
  var ParseInputLazyPath = class {
768
- constructor(parent, value, path27, key2) {
752
+ constructor(parent, value, path28, key2) {
769
753
  this._cachedPath = [];
770
754
  this.parent = parent;
771
755
  this.data = value;
772
- this._path = path27;
756
+ this._path = path28;
773
757
  this._key = key2;
774
758
  }
775
759
  get path() {
@@ -1049,8 +1033,8 @@ var ZodType = class {
1049
1033
  promise() {
1050
1034
  return ZodPromise.create(this, this._def);
1051
1035
  }
1052
- or(option4) {
1053
- return ZodUnion.create([this, option4], this._def);
1036
+ or(option5) {
1037
+ return ZodUnion.create([this, option5], this._def);
1054
1038
  }
1055
1039
  and(incoming) {
1056
1040
  return ZodIntersection.create(this, incoming, this._def);
@@ -2900,7 +2884,7 @@ var ZodUnion = class extends ZodType {
2900
2884
  return INVALID;
2901
2885
  }
2902
2886
  if (ctx.common.async) {
2903
- return Promise.all(options.map(async (option4) => {
2887
+ return Promise.all(options.map(async (option5) => {
2904
2888
  const childCtx = {
2905
2889
  ...ctx,
2906
2890
  common: {
@@ -2910,7 +2894,7 @@ var ZodUnion = class extends ZodType {
2910
2894
  parent: null
2911
2895
  };
2912
2896
  return {
2913
- result: await option4._parseAsync({
2897
+ result: await option5._parseAsync({
2914
2898
  data: ctx.data,
2915
2899
  path: ctx.path,
2916
2900
  parent: childCtx
@@ -2921,7 +2905,7 @@ var ZodUnion = class extends ZodType {
2921
2905
  } else {
2922
2906
  let dirty = void 0;
2923
2907
  const issues = [];
2924
- for (const option4 of options) {
2908
+ for (const option5 of options) {
2925
2909
  const childCtx = {
2926
2910
  ...ctx,
2927
2911
  common: {
@@ -2930,7 +2914,7 @@ var ZodUnion = class extends ZodType {
2930
2914
  },
2931
2915
  parent: null
2932
2916
  };
2933
- const result = option4._parseSync({
2917
+ const result = option5._parseSync({
2934
2918
  data: ctx.data,
2935
2919
  path: ctx.path,
2936
2920
  parent: childCtx
@@ -3011,8 +2995,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
3011
2995
  }
3012
2996
  const discriminator = this.discriminator;
3013
2997
  const discriminatorValue = ctx.data[discriminator];
3014
- const option4 = this.optionsMap.get(discriminatorValue);
3015
- if (!option4) {
2998
+ const option5 = this.optionsMap.get(discriminatorValue);
2999
+ if (!option5) {
3016
3000
  addIssueToContext(ctx, {
3017
3001
  code: ZodIssueCode.invalid_union_discriminator,
3018
3002
  options: Array.from(this.optionsMap.keys()),
@@ -3021,13 +3005,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
3021
3005
  return INVALID;
3022
3006
  }
3023
3007
  if (ctx.common.async) {
3024
- return option4._parseAsync({
3008
+ return option5._parseAsync({
3025
3009
  data: ctx.data,
3026
3010
  path: ctx.path,
3027
3011
  parent: ctx
3028
3012
  });
3029
3013
  } else {
3030
- return option4._parseSync({
3014
+ return option5._parseSync({
3031
3015
  data: ctx.data,
3032
3016
  path: ctx.path,
3033
3017
  parent: ctx
@@ -4211,7 +4195,7 @@ var coerce = {
4211
4195
  };
4212
4196
  var NEVER = INVALID;
4213
4197
 
4214
- // ../../packages/core/dist/chunk-V3JCB3HI.js
4198
+ // ../../packages/core/dist/chunk-KPHTMTZ3.js
4215
4199
  async function fileExists(filePath) {
4216
4200
  try {
4217
4201
  await access(filePath, constants.F_OK);
@@ -4227,10 +4211,6 @@ async function readTextFile(filePath) {
4227
4211
  const content = await readFile(filePath, "utf8");
4228
4212
  return normalizeLineEndings(content);
4229
4213
  }
4230
- async function readJsonFile(filePath) {
4231
- const content = await readFile(filePath, "utf8");
4232
- return JSON.parse(content);
4233
- }
4234
4214
  async function findGitRoot(startPath) {
4235
4215
  let currentDir = path.dirname(path.resolve(startPath));
4236
4216
  const root2 = path.parse(currentDir).root;
@@ -4574,8 +4554,7 @@ function normalizeCodexLogFormat(value) {
4574
4554
  }
4575
4555
  function resolveMockConfig(target) {
4576
4556
  const response = typeof target.response === "string" ? target.response : void 0;
4577
- const trace2 = Array.isArray(target.trace) ? target.trace : void 0;
4578
- return { response, trace: trace2 };
4557
+ return { response };
4579
4558
  }
4580
4559
  function resolveVSCodeConfig(target, env, insiders) {
4581
4560
  const workspaceTemplateEnvVar = resolveOptionalLiteralString(
@@ -4595,9 +4574,9 @@ function resolveVSCodeConfig(target, env, insiders) {
4595
4574
  const dryRunSource = target.dry_run ?? target.dryRun;
4596
4575
  const subagentRootSource = target.subagent_root ?? target.subagentRoot;
4597
4576
  const defaultCommand = insiders ? "code-insiders" : "code";
4598
- const command5 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
4577
+ const command6 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
4599
4578
  return {
4600
- command: command5,
4579
+ command: command6,
4601
4580
  waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
4602
4581
  dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
4603
4582
  subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
@@ -4612,10 +4591,17 @@ function resolveCliConfig(target, env, evalFilePath) {
4612
4591
  const filesFormat = resolveOptionalLiteralString(
4613
4592
  target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
4614
4593
  );
4594
+ const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
4595
+ const keepTempFiles = resolveOptionalBoolean(
4596
+ target.keep_temp_files ?? target.keepTempFiles ?? target.keep_output_files ?? target.keepOutputFiles
4597
+ );
4615
4598
  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
4616
4599
  allowLiteral: true,
4617
4600
  optionalEnv: true
4618
4601
  });
4602
+ if (cwd && evalFilePath && !path2.isAbsolute(cwd)) {
4603
+ cwd = path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd);
4604
+ }
4619
4605
  if (!cwd && evalFilePath) {
4620
4606
  cwd = path2.dirname(path2.resolve(evalFilePath));
4621
4607
  }
@@ -4623,7 +4609,7 @@ function resolveCliConfig(target, env, evalFilePath) {
4623
4609
  target.timeout_seconds ?? target.timeoutSeconds,
4624
4610
  `${target.name} timeout`
4625
4611
  );
4626
- const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name);
4612
+ const healthcheck = resolveCliHealthcheck(target.healthcheck, env, target.name, evalFilePath);
4627
4613
  const commandTemplate = resolveString(
4628
4614
  commandTemplateSource,
4629
4615
  env,
@@ -4636,7 +4622,9 @@ function resolveCliConfig(target, env, evalFilePath) {
4636
4622
  filesFormat,
4637
4623
  cwd,
4638
4624
  timeoutMs,
4639
- healthcheck
4625
+ healthcheck,
4626
+ verbose,
4627
+ keepTempFiles
4640
4628
  };
4641
4629
  }
4642
4630
  function resolveTimeoutMs(source2, description) {
@@ -4649,7 +4637,7 @@ function resolveTimeoutMs(source2, description) {
4649
4637
  }
4650
4638
  return Math.floor(seconds * 1e3);
4651
4639
  }
4652
- function resolveCliHealthcheck(source2, env, targetName) {
4640
+ function resolveCliHealthcheck(source2, env, targetName, evalFilePath) {
4653
4641
  if (source2 === void 0 || source2 === null) {
4654
4642
  return void 0;
4655
4643
  }
@@ -4682,11 +4670,12 @@ function resolveCliHealthcheck(source2, env, targetName) {
4682
4670
  allowLiteral: true,
4683
4671
  optionalEnv: true
4684
4672
  });
4673
+ const resolvedCwd = cwd && evalFilePath && !path2.isAbsolute(cwd) ? path2.resolve(path2.dirname(path2.resolve(evalFilePath)), cwd) : cwd;
4685
4674
  return {
4686
4675
  type: "command",
4687
4676
  commandTemplate,
4688
4677
  timeoutMs,
4689
- cwd
4678
+ cwd: resolvedCwd
4690
4679
  };
4691
4680
  }
4692
4681
  throw new Error(`${targetName} healthcheck type must be 'http' or 'command'`);
@@ -4885,6 +4874,21 @@ var PROVIDER_ALIASES = [
4885
4874
  "vertex"
4886
4875
  // legacy/future support
4887
4876
  ];
4877
+ function extractLastAssistantContent(messages) {
4878
+ if (!messages || messages.length === 0) {
4879
+ return "";
4880
+ }
4881
+ for (let i = messages.length - 1; i >= 0; i--) {
4882
+ const msg = messages[i];
4883
+ if (msg.role === "assistant" && msg.content !== void 0) {
4884
+ if (typeof msg.content === "string") {
4885
+ return msg.content;
4886
+ }
4887
+ return JSON.stringify(msg.content);
4888
+ }
4889
+ }
4890
+ return "";
4891
+ }
4888
4892
  function isAgentProvider(provider) {
4889
4893
  return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
4890
4894
  }
@@ -5995,10 +5999,10 @@ function assignProp(target, prop, value) {
5995
5999
  configurable: true
5996
6000
  });
5997
6001
  }
5998
- function getElementAtPath(obj, path27) {
5999
- if (!path27)
6002
+ function getElementAtPath(obj, path28) {
6003
+ if (!path28)
6000
6004
  return obj;
6001
- return path27.reduce((acc, key2) => acc?.[key2], obj);
6005
+ return path28.reduce((acc, key2) => acc?.[key2], obj);
6002
6006
  }
6003
6007
  function promiseAllObject(promisesObj) {
6004
6008
  const keys = Object.keys(promisesObj);
@@ -6318,11 +6322,11 @@ function aborted(x, startIndex = 0) {
6318
6322
  }
6319
6323
  return false;
6320
6324
  }
6321
- function prefixIssues(path27, issues) {
6325
+ function prefixIssues(path28, issues) {
6322
6326
  return issues.map((iss) => {
6323
6327
  var _a17;
6324
6328
  (_a17 = iss).path ?? (_a17.path = []);
6325
- iss.path.unshift(path27);
6329
+ iss.path.unshift(path28);
6326
6330
  return iss;
6327
6331
  });
6328
6332
  }
@@ -6459,7 +6463,7 @@ function treeifyError(error40, _mapper) {
6459
6463
  return issue2.message;
6460
6464
  };
6461
6465
  const result = { errors: [] };
6462
- const processError = (error41, path27 = []) => {
6466
+ const processError = (error41, path28 = []) => {
6463
6467
  var _a17, _b8;
6464
6468
  for (const issue2 of error41.issues) {
6465
6469
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6469,7 +6473,7 @@ function treeifyError(error40, _mapper) {
6469
6473
  } else if (issue2.code === "invalid_element") {
6470
6474
  processError({ issues: issue2.issues }, issue2.path);
6471
6475
  } else {
6472
- const fullpath = [...path27, ...issue2.path];
6476
+ const fullpath = [...path28, ...issue2.path];
6473
6477
  if (fullpath.length === 0) {
6474
6478
  result.errors.push(mapper(issue2));
6475
6479
  continue;
@@ -6499,9 +6503,9 @@ function treeifyError(error40, _mapper) {
6499
6503
  processError(error40);
6500
6504
  return result;
6501
6505
  }
6502
- function toDotPath(path27) {
6506
+ function toDotPath(path28) {
6503
6507
  const segs = [];
6504
- for (const seg of path27) {
6508
+ for (const seg of path28) {
6505
6509
  if (typeof seg === "number")
6506
6510
  segs.push(`[${seg}]`);
6507
6511
  else if (typeof seg === "symbol")
@@ -8100,7 +8104,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
8100
8104
  defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
8101
8105
  defineLazy(inst._zod, "values", () => {
8102
8106
  if (def.options.every((o) => o._zod.values)) {
8103
- return new Set(def.options.flatMap((option4) => Array.from(option4._zod.values)));
8107
+ return new Set(def.options.flatMap((option5) => Array.from(option5._zod.values)));
8104
8108
  }
8105
8109
  return void 0;
8106
8110
  });
@@ -8114,8 +8118,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
8114
8118
  inst._zod.parse = (payload, ctx) => {
8115
8119
  let async = false;
8116
8120
  const results = [];
8117
- for (const option4 of def.options) {
8118
- const result = option4._zod.run({
8121
+ for (const option5 of def.options) {
8122
+ const result = option5._zod.run({
8119
8123
  value: payload.value,
8120
8124
  issues: []
8121
8125
  }, ctx);
@@ -8140,10 +8144,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
8140
8144
  const _super = inst._zod.parse;
8141
8145
  defineLazy(inst._zod, "propValues", () => {
8142
8146
  const propValues = {};
8143
- for (const option4 of def.options) {
8144
- const pv = option4._zod.propValues;
8147
+ for (const option5 of def.options) {
8148
+ const pv = option5._zod.propValues;
8145
8149
  if (!pv || Object.keys(pv).length === 0)
8146
- throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option4)}"`);
8150
+ throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option5)}"`);
8147
8151
  for (const [k, v] of Object.entries(pv)) {
8148
8152
  if (!propValues[k])
8149
8153
  propValues[k] = /* @__PURE__ */ new Set();
@@ -15347,8 +15351,8 @@ function isTransforming(_schema, _ctx) {
15347
15351
  return false;
15348
15352
  }
15349
15353
  case "union": {
15350
- for (const option4 of def.options) {
15351
- if (isTransforming(option4, ctx))
15354
+ for (const option5 of def.options) {
15355
+ if (isTransforming(option5, ctx))
15352
15356
  return true;
15353
15357
  }
15354
15358
  return false;
@@ -26054,14 +26058,14 @@ function createAzure(options = {}) {
26054
26058
  description: "Azure OpenAI resource name"
26055
26059
  });
26056
26060
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26057
- const url2 = ({ path: path27, modelId }) => {
26061
+ const url2 = ({ path: path28, modelId }) => {
26058
26062
  var _a24;
26059
26063
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26060
26064
  let fullUrl;
26061
26065
  if (options.useDeploymentBasedUrls) {
26062
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path27}`);
26066
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path28}`);
26063
26067
  } else {
26064
- fullUrl = new URL(`${baseUrlPrefix}/v1${path27}`);
26068
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path28}`);
26065
26069
  }
26066
26070
  fullUrl.searchParams.set("api-version", apiVersion);
26067
26071
  return fullUrl.toString();
@@ -34589,33 +34593,22 @@ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
34589
34593
  function isEvaluatorKind(value) {
34590
34594
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
34591
34595
  }
34592
- function isTraceEventType(value) {
34593
- return typeof value === "string" && ["model_step", "tool_call", "tool_result", "message", "error"].includes(value);
34594
- }
34595
- function isTraceEvent(value) {
34596
- if (typeof value !== "object" || value === null) {
34597
- return false;
34598
- }
34599
- const candidate = value;
34600
- return isTraceEventType(candidate.type) && typeof candidate.timestamp === "string";
34601
- }
34602
- function computeTraceSummary(trace2) {
34596
+ function computeTraceSummary(messages) {
34603
34597
  const toolCallCounts = {};
34604
- let errorCount = 0;
34605
- for (const event of trace2) {
34606
- if (event.type === "tool_call" && event.name) {
34607
- toolCallCounts[event.name] = (toolCallCounts[event.name] ?? 0) + 1;
34608
- }
34609
- if (event.type === "error") {
34610
- errorCount++;
34598
+ let totalToolCalls = 0;
34599
+ for (const message of messages) {
34600
+ if (!message.toolCalls) continue;
34601
+ for (const toolCall of message.toolCalls) {
34602
+ toolCallCounts[toolCall.tool] = (toolCallCounts[toolCall.tool] ?? 0) + 1;
34603
+ totalToolCalls++;
34611
34604
  }
34612
34605
  }
34613
34606
  const toolNames = Object.keys(toolCallCounts).sort();
34614
34607
  return {
34615
- eventCount: trace2.length,
34608
+ eventCount: totalToolCalls,
34616
34609
  toolNames,
34617
34610
  toolCallsByName: toolCallCounts,
34618
- errorCount
34611
+ errorCount: 0
34619
34612
  };
34620
34613
  }
34621
34614
  function extractCodeBlocks(segments) {
@@ -34863,7 +34856,8 @@ var TEMPLATE_VARIABLES = {
34863
34856
  QUESTION: "question",
34864
34857
  EXPECTED_OUTCOME: "expected_outcome",
34865
34858
  REFERENCE_ANSWER: "reference_answer",
34866
- INPUT_MESSAGES: "input_messages"
34859
+ INPUT_MESSAGES: "input_messages",
34860
+ OUTPUT_MESSAGES: "output_messages"
34867
34861
  };
34868
34862
  var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
34869
34863
  var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
@@ -35253,6 +35247,17 @@ async function processMessages(options) {
35253
35247
  }
35254
35248
  continue;
35255
35249
  }
35250
+ if (isJsonObject(content)) {
35251
+ const rendered = JSON.stringify(content, null, 2);
35252
+ segments.push({ type: "text", value: rendered });
35253
+ if (textParts) {
35254
+ textParts.push(rendered);
35255
+ }
35256
+ continue;
35257
+ }
35258
+ if (!Array.isArray(content)) {
35259
+ continue;
35260
+ }
35256
35261
  for (const rawSegment of content) {
35257
35262
  if (!isJsonObject(rawSegment)) {
35258
35263
  continue;
@@ -35475,6 +35480,11 @@ async function buildPromptInputs(testCase, mode = "lm") {
35475
35480
  }
35476
35481
  }
35477
35482
  }
35483
+ } else if (isJsonObject(message.content)) {
35484
+ const rendered = JSON.stringify(message.content, null, 2);
35485
+ if (rendered.trim().length > 0) {
35486
+ messageSegments.push({ type: "text", value: rendered });
35487
+ }
35478
35488
  }
35479
35489
  segmentsByMessage.push(messageSegments);
35480
35490
  }
@@ -35716,16 +35726,16 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35716
35726
  }) : [];
35717
35727
  const codeSnippets = extractCodeBlocks(inputSegments);
35718
35728
  let referenceAnswer = "";
35719
- if (outputSegments.length > 1) {
35720
- referenceAnswer = JSON.stringify(outputSegments, null, 2);
35721
- } else if (outputSegments.length === 1) {
35722
- const singleMessage = outputSegments[0];
35723
- if (typeof singleMessage.content === "string") {
35724
- referenceAnswer = singleMessage.content;
35725
- } else if (singleMessage.content) {
35726
- referenceAnswer = JSON.stringify(singleMessage, null, 2);
35727
- } else if (singleMessage.tool_calls) {
35728
- referenceAnswer = JSON.stringify(singleMessage, null, 2);
35729
+ if (outputSegments.length > 0) {
35730
+ const lastMessage = outputSegments[outputSegments.length - 1];
35731
+ const content = lastMessage.content;
35732
+ const toolCalls = lastMessage.tool_calls;
35733
+ if (typeof content === "string") {
35734
+ referenceAnswer = content;
35735
+ } else if (content !== void 0 && content !== null) {
35736
+ referenceAnswer = JSON.stringify(content, null, 2);
35737
+ } else if (toolCalls !== void 0 && toolCalls !== null) {
35738
+ referenceAnswer = JSON.stringify(toolCalls, null, 2);
35729
35739
  }
35730
35740
  }
35731
35741
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
@@ -36047,11 +36057,11 @@ async function invokeModel(options) {
36047
36057
  return mapResponse(result);
36048
36058
  }
36049
36059
  function mapResponse(result) {
36060
+ const content = result.text ?? "";
36050
36061
  return {
36051
- text: result.text ?? "",
36052
- reasoning: result.reasoningText ?? void 0,
36053
36062
  raw: result,
36054
- usage: toJsonObject(result.totalUsage ?? result.usage)
36063
+ usage: toJsonObject(result.totalUsage ?? result.usage),
36064
+ outputMessages: [{ role: "assistant", content }]
36055
36065
  };
36056
36066
  }
36057
36067
  function toJsonObject(value) {
@@ -36158,7 +36168,7 @@ async function withRetry(fn, retryConfig, signal) {
36158
36168
  }
36159
36169
  var execAsync2 = promisify2(execWithCallback);
36160
36170
  var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
36161
- async function defaultCommandRunner(command5, options) {
36171
+ async function defaultCommandRunner(command6, options) {
36162
36172
  const execOptions = {
36163
36173
  cwd: options.cwd,
36164
36174
  env: options.env,
@@ -36168,7 +36178,7 @@ async function defaultCommandRunner(command5, options) {
36168
36178
  shell: process.platform === "win32" ? "powershell.exe" : void 0
36169
36179
  };
36170
36180
  try {
36171
- const { stdout, stderr } = await execAsync2(command5, execOptions);
36181
+ const { stdout, stderr } = await execAsync2(command6, execOptions);
36172
36182
  return {
36173
36183
  stdout,
36174
36184
  stderr,
@@ -36193,10 +36203,11 @@ var CliProvider = class {
36193
36203
  id;
36194
36204
  kind = "cli";
36195
36205
  targetName;
36196
- supportsBatch = false;
36206
+ supportsBatch = true;
36197
36207
  config;
36198
36208
  runCommand;
36199
36209
  verbose;
36210
+ keepTempFiles;
36200
36211
  healthcheckPromise;
36201
36212
  constructor(targetName, config2, runner = defaultCommandRunner) {
36202
36213
  this.targetName = targetName;
@@ -36204,6 +36215,7 @@ var CliProvider = class {
36204
36215
  this.config = config2;
36205
36216
  this.runCommand = runner;
36206
36217
  this.verbose = config2.verbose ?? false;
36218
+ this.keepTempFiles = config2.keepTempFiles ?? false;
36207
36219
  }
36208
36220
  async invoke(request) {
36209
36221
  if (request.signal?.aborted) {
@@ -36213,6 +36225,11 @@ var CliProvider = class {
36213
36225
  const outputFilePath = generateOutputFilePath(request.evalCaseId);
36214
36226
  const templateValues = buildTemplateValues(request, this.config, outputFilePath);
36215
36227
  const renderedCommand = renderTemplate2(this.config.commandTemplate, templateValues);
36228
+ if (this.verbose) {
36229
+ console.log(
36230
+ `[cli-provider:${this.targetName}] cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
36231
+ );
36232
+ }
36216
36233
  const result = await this.runCommand(renderedCommand, {
36217
36234
  cwd: this.config.cwd,
36218
36235
  env: process.env,
@@ -36236,8 +36253,7 @@ var CliProvider = class {
36236
36253
  const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
36237
36254
  const parsed = this.parseOutputContent(responseContent);
36238
36255
  return {
36239
- text: parsed.text,
36240
- trace: parsed.trace,
36256
+ outputMessages: parsed.outputMessages,
36241
36257
  raw: {
36242
36258
  command: renderedCommand,
36243
36259
  stderr: result.stderr,
@@ -36247,30 +36263,225 @@ var CliProvider = class {
36247
36263
  }
36248
36264
  };
36249
36265
  }
36266
+ async invokeBatch(requests) {
36267
+ if (requests.length === 0) {
36268
+ return [];
36269
+ }
36270
+ for (const request of requests) {
36271
+ if (request.signal?.aborted) {
36272
+ throw new Error("CLI provider batch request was aborted before execution");
36273
+ }
36274
+ }
36275
+ const controller = new AbortController();
36276
+ for (const request of requests) {
36277
+ request.signal?.addEventListener("abort", () => controller.abort(), { once: true });
36278
+ }
36279
+ await this.ensureHealthy(controller.signal);
36280
+ const outputFilePath = generateOutputFilePath("batch", ".jsonl");
36281
+ const batchInputFiles = [];
36282
+ for (const request of requests) {
36283
+ if (request.inputFiles && request.inputFiles.length > 0) {
36284
+ batchInputFiles.push(...request.inputFiles);
36285
+ }
36286
+ }
36287
+ const templateValues = buildTemplateValues(
36288
+ {
36289
+ question: "",
36290
+ guidelines: "",
36291
+ inputFiles: batchInputFiles,
36292
+ evalCaseId: "batch",
36293
+ attempt: 0
36294
+ },
36295
+ this.config,
36296
+ outputFilePath
36297
+ );
36298
+ const renderedCommand = renderTemplate2(this.config.commandTemplate, templateValues);
36299
+ if (this.verbose) {
36300
+ console.log(
36301
+ `[cli-provider:${this.targetName}] (batch size=${requests.length}) cwd=${this.config.cwd ?? ""} command=${renderedCommand}`
36302
+ );
36303
+ }
36304
+ const result = await this.runCommand(renderedCommand, {
36305
+ cwd: this.config.cwd,
36306
+ env: process.env,
36307
+ timeoutMs: this.config.timeoutMs,
36308
+ signal: controller.signal
36309
+ });
36310
+ if (result.failed || (result.exitCode ?? 0) !== 0) {
36311
+ if (controller.signal.aborted) {
36312
+ throw new Error("CLI provider request was aborted");
36313
+ }
36314
+ if (result.timedOut) {
36315
+ throw new Error(
36316
+ `CLI provider timed out${formatTimeoutSuffix(this.config.timeoutMs ?? void 0)}`
36317
+ );
36318
+ }
36319
+ const codeText = result.exitCode !== null ? result.exitCode : "unknown";
36320
+ const detail = result.stderr.trim() || result.stdout.trim();
36321
+ const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
36322
+ throw new Error(message);
36323
+ }
36324
+ const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
36325
+ const recordsById = this.parseJsonlBatchOutput(responseContent);
36326
+ const requestedIds = requests.map((request) => request.evalCaseId).filter((id) => typeof id === "string" && id.trim().length > 0);
36327
+ const missingIds = requestedIds.filter((id) => !recordsById.has(id));
36328
+ if (missingIds.length > 0) {
36329
+ throw new Error(`CLI batch output missing ids: ${missingIds.join(", ")}`);
36330
+ }
36331
+ const responses = requests.map((request) => {
36332
+ const evalCaseId = request.evalCaseId;
36333
+ if (!evalCaseId) {
36334
+ return {
36335
+ outputMessages: [],
36336
+ raw: {
36337
+ command: renderedCommand,
36338
+ stderr: result.stderr,
36339
+ exitCode: result.exitCode ?? 0,
36340
+ cwd: this.config.cwd,
36341
+ outputFile: outputFilePath
36342
+ }
36343
+ };
36344
+ }
36345
+ const parsed = recordsById.get(evalCaseId);
36346
+ if (!parsed) {
36347
+ return {
36348
+ outputMessages: [],
36349
+ raw: {
36350
+ command: renderedCommand,
36351
+ stderr: result.stderr,
36352
+ exitCode: result.exitCode ?? 0,
36353
+ cwd: this.config.cwd,
36354
+ outputFile: outputFilePath
36355
+ }
36356
+ };
36357
+ }
36358
+ return {
36359
+ outputMessages: parsed.outputMessages,
36360
+ raw: {
36361
+ command: renderedCommand,
36362
+ stderr: result.stderr,
36363
+ exitCode: result.exitCode ?? 0,
36364
+ cwd: this.config.cwd,
36365
+ outputFile: outputFilePath,
36366
+ recordId: evalCaseId
36367
+ }
36368
+ };
36369
+ });
36370
+ return responses;
36371
+ }
36250
36372
  /**
36251
36373
  * Parse output content from CLI.
36252
- * If the content is valid JSON with a 'text' field, extract text and optional trace.
36253
- * Otherwise, treat the entire content as plain text.
36374
+ * If the content is valid JSON with 'output_messages' or 'text' field, extract them.
36375
+ * If only 'text' is provided, wrap it in outputMessages.
36376
+ * Otherwise, treat the entire content as plain text wrapped in outputMessages.
36254
36377
  */
36255
36378
  parseOutputContent(content) {
36256
36379
  try {
36257
36380
  const parsed = JSON.parse(content);
36258
- if (typeof parsed === "object" && parsed !== null && "text" in parsed) {
36381
+ if (typeof parsed === "object" && parsed !== null) {
36259
36382
  const obj = parsed;
36260
- const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
36261
- const trace2 = this.parseTrace(obj.trace);
36262
- return { text: text2, trace: trace2 };
36383
+ const outputMessages = this.parseOutputMessages(obj.output_messages);
36384
+ if (outputMessages && outputMessages.length > 0) {
36385
+ return { outputMessages };
36386
+ }
36387
+ if ("text" in obj) {
36388
+ const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
36389
+ return { outputMessages: [{ role: "assistant", content: text2 }] };
36390
+ }
36263
36391
  }
36264
36392
  } catch {
36265
36393
  }
36266
- return { text: content };
36394
+ return { outputMessages: [{ role: "assistant", content }] };
36267
36395
  }
36268
- parseTrace(trace2) {
36269
- if (!Array.isArray(trace2)) {
36396
+ /**
36397
+ * Parse output_messages from JSONL (snake_case) and convert to OutputMessage[] (camelCase).
36398
+ */
36399
+ parseOutputMessages(outputMessages) {
36400
+ if (!Array.isArray(outputMessages)) {
36270
36401
  return void 0;
36271
36402
  }
36272
- const validEvents = trace2.filter(isTraceEvent);
36273
- return validEvents.length > 0 ? validEvents : void 0;
36403
+ const messages = [];
36404
+ for (const msg of outputMessages) {
36405
+ if (typeof msg !== "object" || msg === null) {
36406
+ continue;
36407
+ }
36408
+ const rawMsg = msg;
36409
+ if (typeof rawMsg.role !== "string") {
36410
+ continue;
36411
+ }
36412
+ const message = {
36413
+ role: rawMsg.role,
36414
+ name: typeof rawMsg.name === "string" ? rawMsg.name : void 0,
36415
+ content: rawMsg.content,
36416
+ toolCalls: this.parseToolCalls(rawMsg.tool_calls),
36417
+ timestamp: typeof rawMsg.timestamp === "string" ? rawMsg.timestamp : void 0,
36418
+ metadata: typeof rawMsg.metadata === "object" && rawMsg.metadata !== null ? rawMsg.metadata : void 0
36419
+ };
36420
+ messages.push(message);
36421
+ }
36422
+ return messages.length > 0 ? messages : void 0;
36423
+ }
36424
+ /**
36425
+ * Parse tool_calls from JSONL (snake_case) and convert to ToolCall[] format.
36426
+ */
36427
+ parseToolCalls(toolCalls) {
36428
+ if (!Array.isArray(toolCalls)) {
36429
+ return void 0;
36430
+ }
36431
+ const calls = [];
36432
+ for (const call of toolCalls) {
36433
+ if (typeof call !== "object" || call === null) {
36434
+ continue;
36435
+ }
36436
+ const rawCall = call;
36437
+ if (typeof rawCall.tool !== "string") {
36438
+ continue;
36439
+ }
36440
+ calls.push({
36441
+ tool: rawCall.tool,
36442
+ input: rawCall.input,
36443
+ output: rawCall.output,
36444
+ id: typeof rawCall.id === "string" ? rawCall.id : void 0,
36445
+ timestamp: typeof rawCall.timestamp === "string" ? rawCall.timestamp : void 0
36446
+ });
36447
+ }
36448
+ return calls.length > 0 ? calls : void 0;
36449
+ }
36450
+ parseJsonlBatchOutput(content) {
36451
+ const records = /* @__PURE__ */ new Map();
36452
+ const lines = content.split(/\r?\n/).map((line2) => line2.trim()).filter((line2) => line2.length > 0);
36453
+ for (const line2 of lines) {
36454
+ let parsed;
36455
+ try {
36456
+ parsed = JSON.parse(line2);
36457
+ } catch (error40) {
36458
+ const reason = error40 instanceof Error ? error40.message : String(error40);
36459
+ throw new Error(`CLI batch output contains invalid JSONL line: ${reason}`);
36460
+ }
36461
+ if (typeof parsed !== "object" || parsed === null) {
36462
+ throw new Error("CLI batch output JSONL line must be an object");
36463
+ }
36464
+ const obj = parsed;
36465
+ const id = typeof obj.id === "string" ? obj.id : void 0;
36466
+ if (!id || id.trim().length === 0) {
36467
+ throw new Error("CLI batch output JSONL line missing required string field: id");
36468
+ }
36469
+ if (records.has(id)) {
36470
+ throw new Error(`CLI batch output contains duplicate id: ${id}`);
36471
+ }
36472
+ const parsedOutputMessages = this.parseOutputMessages(obj.output_messages);
36473
+ let outputMessages;
36474
+ if (parsedOutputMessages && parsedOutputMessages.length > 0) {
36475
+ outputMessages = parsedOutputMessages;
36476
+ } else {
36477
+ const text2 = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
36478
+ outputMessages = text2 ? [{ role: "assistant", content: text2 }] : [];
36479
+ }
36480
+ records.set(id, {
36481
+ outputMessages
36482
+ });
36483
+ }
36484
+ return records;
36274
36485
  }
36275
36486
  async readAndCleanupOutputFile(filePath) {
36276
36487
  try {
@@ -36280,8 +36491,10 @@ var CliProvider = class {
36280
36491
  const errorMsg = error40 instanceof Error ? error40.message : String(error40);
36281
36492
  throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
36282
36493
  } finally {
36283
- await fs.unlink(filePath).catch(() => {
36284
- });
36494
+ if (!this.keepTempFiles) {
36495
+ await fs.unlink(filePath).catch(() => {
36496
+ });
36497
+ }
36285
36498
  }
36286
36499
  }
36287
36500
  async ensureHealthy(signal) {
@@ -36333,7 +36546,7 @@ var CliProvider = class {
36333
36546
  );
36334
36547
  if (this.verbose) {
36335
36548
  console.log(
36336
- `[cli-provider:${this.targetName}] (healthcheck) CLI_EVALS_DIR=${process.env.CLI_EVALS_DIR ?? ""} cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
36549
+ `[cli-provider:${this.targetName}] (healthcheck) cwd=${healthcheck.cwd ?? this.config.cwd ?? ""} command=${renderedCommand}`
36337
36550
  );
36338
36551
  }
36339
36552
  const result = await this.runCommand(renderedCommand, {
@@ -36401,11 +36614,11 @@ function shellEscape(value) {
36401
36614
  }
36402
36615
  return `'${value.replace(/'/g, `'"'"'`)}'`;
36403
36616
  }
36404
- function generateOutputFilePath(evalCaseId) {
36617
+ function generateOutputFilePath(evalCaseId, extension = ".json") {
36405
36618
  const safeEvalId = evalCaseId || "unknown";
36406
36619
  const timestamp = Date.now();
36407
36620
  const random = Math.random().toString(36).substring(2, 9);
36408
- return path72.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}.json`);
36621
+ return path72.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
36409
36622
  }
36410
36623
  function formatTimeoutSuffix(timeoutMs) {
36411
36624
  if (!timeoutMs || timeoutMs <= 0) {
@@ -36601,7 +36814,6 @@ var CodexProvider = class {
36601
36814
  const parsed = parseCodexJson(result.stdout);
36602
36815
  const assistantText = extractAssistantText(parsed);
36603
36816
  return {
36604
- text: assistantText,
36605
36817
  raw: {
36606
36818
  response: parsed,
36607
36819
  stdout: result.stdout,
@@ -36613,7 +36825,8 @@ var CodexProvider = class {
36613
36825
  workspace: workspaceRoot,
36614
36826
  inputFiles,
36615
36827
  logFile: logger?.filePath
36616
- }
36828
+ },
36829
+ outputMessages: [{ role: "assistant", content: assistantText }]
36617
36830
  };
36618
36831
  } finally {
36619
36832
  await logger?.close();
@@ -37233,7 +37446,6 @@ var MockProvider = class {
37233
37446
  delayMs;
37234
37447
  delayMinMs;
37235
37448
  delayMaxMs;
37236
- trace;
37237
37449
  constructor(targetName, config2) {
37238
37450
  this.id = `mock:${targetName}`;
37239
37451
  this.targetName = targetName;
@@ -37241,7 +37453,6 @@ var MockProvider = class {
37241
37453
  this.delayMs = config2.delayMs ?? 0;
37242
37454
  this.delayMinMs = config2.delayMinMs ?? 0;
37243
37455
  this.delayMaxMs = config2.delayMaxMs ?? 0;
37244
- this.trace = config2.trace;
37245
37456
  }
37246
37457
  async invoke(request) {
37247
37458
  const delay2 = this.calculateDelay();
@@ -37249,12 +37460,11 @@ var MockProvider = class {
37249
37460
  await new Promise((resolve2) => setTimeout(resolve2, delay2));
37250
37461
  }
37251
37462
  return {
37252
- text: this.cannedResponse,
37463
+ outputMessages: [{ role: "assistant", content: this.cannedResponse }],
37253
37464
  raw: {
37254
37465
  question: request.question,
37255
37466
  guidelines: request.guidelines
37256
- },
37257
- trace: this.trace
37467
+ }
37258
37468
  };
37259
37469
  }
37260
37470
  calculateDelay() {
@@ -37334,7 +37544,7 @@ var VSCodeProvider = class {
37334
37544
  }
37335
37545
  if (this.config.dryRun) {
37336
37546
  return {
37337
- text: "",
37547
+ outputMessages: [],
37338
37548
  raw: {
37339
37549
  session,
37340
37550
  inputFiles
@@ -37343,7 +37553,7 @@ var VSCodeProvider = class {
37343
37553
  }
37344
37554
  const responseText = await readTextFile(session.responseFile);
37345
37555
  return {
37346
- text: responseText,
37556
+ outputMessages: [{ role: "assistant", content: responseText }],
37347
37557
  raw: {
37348
37558
  session,
37349
37559
  inputFiles
@@ -37381,7 +37591,7 @@ var VSCodeProvider = class {
37381
37591
  }
37382
37592
  if (this.config.dryRun) {
37383
37593
  return normalizedRequests.map(({ inputFiles }) => ({
37384
- text: "",
37594
+ outputMessages: [],
37385
37595
  raw: {
37386
37596
  session,
37387
37597
  inputFiles,
@@ -37398,7 +37608,7 @@ var VSCodeProvider = class {
37398
37608
  for (const [index, responseFile] of session.responseFiles.entries()) {
37399
37609
  const responseText = await readTextFile(responseFile);
37400
37610
  responses.push({
37401
- text: responseText,
37611
+ outputMessages: [{ role: "assistant", content: responseText }],
37402
37612
  raw: {
37403
37613
  session,
37404
37614
  inputFiles: normalizedRequests[index]?.inputFiles,
@@ -37686,6 +37896,7 @@ var LlmJudgeEvaluator = class {
37686
37896
  null,
37687
37897
  2
37688
37898
  ),
37899
+ [TEMPLATE_VARIABLES.OUTPUT_MESSAGES]: JSON.stringify(context.outputMessages ?? [], null, 2),
37689
37900
  [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
37690
37901
  [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
37691
37902
  [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
@@ -37710,7 +37921,7 @@ var LlmJudgeEvaluator = class {
37710
37921
  const score = clampScore(data.score);
37711
37922
  const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
37712
37923
  const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
37713
- const reasoning = data.reasoning ?? providerResponse?.reasoning;
37924
+ const reasoning = data.reasoning;
37714
37925
  const expectedAspectCount = Math.max(hits.length + misses.length, 1);
37715
37926
  return {
37716
37927
  score,
@@ -37812,7 +38023,9 @@ var LlmJudgeEvaluator = class {
37812
38023
  maxOutputTokens: this.maxOutputTokens,
37813
38024
  temperature: this.temperature
37814
38025
  });
37815
- const data = schema.parse(parseJsonFromText(response.text ?? ""));
38026
+ const data = schema.parse(
38027
+ parseJsonFromText(extractLastAssistantContent(response.outputMessages))
38028
+ );
37816
38029
  return { data, providerResponse: response };
37817
38030
  } catch (e) {
37818
38031
  lastError = e instanceof Error ? e : new Error(String(e));
@@ -37895,15 +38108,16 @@ var CodeEvaluator = class {
37895
38108
  {
37896
38109
  question: context.evalCase.question,
37897
38110
  expected_outcome: context.evalCase.expected_outcome,
38111
+ expected_messages: context.evalCase.expected_messages,
37898
38112
  reference_answer: context.evalCase.reference_answer,
37899
38113
  candidate_answer: context.candidate,
38114
+ output_messages: context.outputMessages ?? null,
37900
38115
  guideline_files: context.evalCase.guideline_paths,
37901
38116
  input_files: context.evalCase.file_paths.filter(
37902
38117
  (path132) => !context.evalCase.guideline_paths.includes(path132)
37903
38118
  ),
37904
38119
  input_messages: context.evalCase.input_messages,
37905
- candidate_trace_file: context.candidateTraceRef ?? null,
37906
- candidate_trace_summary: context.candidateTraceSummary ?? null
38120
+ candidate_trace_summary: context.traceSummary ?? null
37907
38121
  },
37908
38122
  null,
37909
38123
  2
@@ -38030,8 +38244,19 @@ var ToolTrajectoryEvaluator = class {
38030
38244
  this.config = options.config;
38031
38245
  }
38032
38246
  evaluate(context) {
38033
- const { candidateTrace, candidateTraceSummary } = context;
38034
- if (!candidateTrace || !candidateTraceSummary) {
38247
+ const { outputMessages, traceSummary } = context;
38248
+ const toolCalls = this.extractToolCallsFromMessages(outputMessages);
38249
+ if (toolCalls.length === 0 && !traceSummary) {
38250
+ return {
38251
+ score: 0,
38252
+ verdict: "fail",
38253
+ hits: [],
38254
+ misses: ["No trace available for evaluation"],
38255
+ expectedAspectCount: 1
38256
+ };
38257
+ }
38258
+ const summary = toolCalls.length > 0 ? this.buildSummary(toolCalls) : traceSummary;
38259
+ if (!summary) {
38035
38260
  return {
38036
38261
  score: 0,
38037
38262
  verdict: "fail",
@@ -38042,11 +38267,11 @@ var ToolTrajectoryEvaluator = class {
38042
38267
  }
38043
38268
  switch (this.config.mode) {
38044
38269
  case "any_order":
38045
- return this.evaluateAnyOrder(candidateTraceSummary);
38270
+ return this.evaluateAnyOrder(summary);
38046
38271
  case "in_order":
38047
- return this.evaluateInOrder(candidateTrace);
38272
+ return this.evaluateInOrder(toolCalls);
38048
38273
  case "exact":
38049
- return this.evaluateExact(candidateTrace);
38274
+ return this.evaluateExact(toolCalls);
38050
38275
  default:
38051
38276
  return {
38052
38277
  score: 0,
@@ -38057,6 +38282,39 @@ var ToolTrajectoryEvaluator = class {
38057
38282
  };
38058
38283
  }
38059
38284
  }
38285
+ /**
38286
+ * Extract tool calls from output messages.
38287
+ */
38288
+ extractToolCallsFromMessages(messages) {
38289
+ if (!messages) {
38290
+ return [];
38291
+ }
38292
+ const toolCalls = [];
38293
+ for (const message of messages) {
38294
+ if (message.toolCalls) {
38295
+ for (const call of message.toolCalls) {
38296
+ toolCalls.push({ name: call.tool });
38297
+ }
38298
+ }
38299
+ }
38300
+ return toolCalls;
38301
+ }
38302
+ /**
38303
+ * Build a summary from extracted tool calls.
38304
+ */
38305
+ buildSummary(toolCalls) {
38306
+ const toolCallsByName = {};
38307
+ for (const call of toolCalls) {
38308
+ toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1;
38309
+ }
38310
+ const toolNames = Object.keys(toolCallsByName).sort();
38311
+ return {
38312
+ eventCount: toolCalls.length,
38313
+ toolNames,
38314
+ toolCallsByName,
38315
+ errorCount: 0
38316
+ };
38317
+ }
38060
38318
  evaluateAnyOrder(summary) {
38061
38319
  const minimums = this.config.minimums ?? {};
38062
38320
  const toolNames = Object.keys(minimums);
@@ -38089,7 +38347,7 @@ var ToolTrajectoryEvaluator = class {
38089
38347
  expectedAspectCount: toolNames.length
38090
38348
  };
38091
38349
  }
38092
- evaluateInOrder(trace2) {
38350
+ evaluateInOrder(toolCalls) {
38093
38351
  const expected = this.config.expected ?? [];
38094
38352
  if (expected.length === 0) {
38095
38353
  return {
@@ -38100,15 +38358,14 @@ var ToolTrajectoryEvaluator = class {
38100
38358
  expectedAspectCount: 0
38101
38359
  };
38102
38360
  }
38103
- const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
38104
38361
  const hits = [];
38105
38362
  const misses = [];
38106
38363
  let actualIndex = 0;
38107
38364
  for (let i = 0; i < expected.length; i++) {
38108
38365
  const expectedTool = expected[i].tool;
38109
38366
  let found = false;
38110
- while (actualIndex < actualToolCalls.length) {
38111
- if (actualToolCalls[actualIndex].name === expectedTool) {
38367
+ while (actualIndex < toolCalls.length) {
38368
+ if (toolCalls[actualIndex].name === expectedTool) {
38112
38369
  hits.push(`Found ${expectedTool} at position ${actualIndex}`);
38113
38370
  actualIndex++;
38114
38371
  found = true;
@@ -38129,7 +38386,7 @@ var ToolTrajectoryEvaluator = class {
38129
38386
  expectedAspectCount: expected.length
38130
38387
  };
38131
38388
  }
38132
- evaluateExact(trace2) {
38389
+ evaluateExact(toolCalls) {
38133
38390
  const expected = this.config.expected ?? [];
38134
38391
  if (expected.length === 0) {
38135
38392
  return {
@@ -38140,16 +38397,15 @@ var ToolTrajectoryEvaluator = class {
38140
38397
  expectedAspectCount: 0
38141
38398
  };
38142
38399
  }
38143
- const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
38144
38400
  const hits = [];
38145
38401
  const misses = [];
38146
- if (actualToolCalls.length !== expected.length) {
38147
- misses.push(`Expected ${expected.length} tool calls, got ${actualToolCalls.length}`);
38402
+ if (toolCalls.length !== expected.length) {
38403
+ misses.push(`Expected ${expected.length} tool calls, got ${toolCalls.length}`);
38148
38404
  }
38149
- const checkLength = Math.min(expected.length, actualToolCalls.length);
38405
+ const checkLength = Math.min(expected.length, toolCalls.length);
38150
38406
  for (let i = 0; i < checkLength; i++) {
38151
38407
  const expectedTool = expected[i].tool;
38152
- const actualTool = actualToolCalls[i].name;
38408
+ const actualTool = toolCalls[i].name;
38153
38409
  if (actualTool === expectedTool) {
38154
38410
  hits.push(`Position ${i}: ${expectedTool} \u2713`);
38155
38411
  } else {
@@ -38363,11 +38619,13 @@ var CompositeEvaluator = class {
38363
38619
  evalCaseId: context.evalCase.id,
38364
38620
  attempt: context.attempt
38365
38621
  });
38366
- const data = freeformEvaluationSchema.parse(parseJsonFromText(response.text ?? ""));
38622
+ const data = freeformEvaluationSchema.parse(
38623
+ parseJsonFromText(extractLastAssistantContent(response.outputMessages))
38624
+ );
38367
38625
  const score = clampScore(data.score);
38368
38626
  const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
38369
38627
  const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
38370
- const reasoning = data.reasoning ?? response.reasoning;
38628
+ const reasoning = data.reasoning;
38371
38629
  return {
38372
38630
  score,
38373
38631
  verdict: scoreToVerdict(score),
@@ -38779,11 +39037,14 @@ async function runBatchEvaluation(options) {
38779
39037
  const evalCase = evalCases[i];
38780
39038
  const promptInputs = promptInputsList[i];
38781
39039
  const providerResponse = batchResponse[i];
39040
+ const outputMessages = providerResponse.outputMessages;
39041
+ const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
39042
+ const candidate = extractLastAssistantContent(outputMessages);
38782
39043
  let result;
38783
39044
  try {
38784
39045
  result = await evaluateCandidate({
38785
39046
  evalCase,
38786
- candidate: providerResponse.text ?? "",
39047
+ candidate,
38787
39048
  target,
38788
39049
  provider,
38789
39050
  evaluators: evaluatorRegistry,
@@ -38791,7 +39052,9 @@ async function runBatchEvaluation(options) {
38791
39052
  nowFn,
38792
39053
  attempt: 0,
38793
39054
  judgeProvider: await resolveJudgeProvider(target),
38794
- agentTimeoutMs
39055
+ agentTimeoutMs,
39056
+ outputMessages,
39057
+ traceSummary
38795
39058
  });
38796
39059
  } catch (error40) {
38797
39060
  const errorResult = buildErrorResult(
@@ -38895,21 +39158,13 @@ async function runEvalCase(options) {
38895
39158
  if (cacheKey && cache && !cachedResponse) {
38896
39159
  await cache.set(cacheKey, providerResponse);
38897
39160
  }
38898
- let candidateTrace = providerResponse.trace;
38899
- if (!candidateTrace && providerResponse.traceRef) {
38900
- try {
38901
- const rawTrace = await readJsonFile(providerResponse.traceRef);
38902
- if (Array.isArray(rawTrace) && rawTrace.every(isTraceEvent)) {
38903
- candidateTrace = rawTrace;
38904
- }
38905
- } catch {
38906
- }
38907
- }
38908
- const candidateTraceSummary = candidateTrace ? computeTraceSummary(candidateTrace) : void 0;
39161
+ const outputMessages = providerResponse.outputMessages;
39162
+ const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
39163
+ const candidate = extractLastAssistantContent(outputMessages);
38909
39164
  try {
38910
39165
  return await evaluateCandidate({
38911
39166
  evalCase,
38912
- candidate: providerResponse.text ?? "",
39167
+ candidate,
38913
39168
  target,
38914
39169
  provider,
38915
39170
  evaluators,
@@ -38918,9 +39173,8 @@ async function runEvalCase(options) {
38918
39173
  attempt,
38919
39174
  judgeProvider,
38920
39175
  agentTimeoutMs,
38921
- candidateTrace,
38922
- candidateTraceRef: providerResponse.traceRef,
38923
- candidateTraceSummary
39176
+ outputMessages,
39177
+ traceSummary
38924
39178
  });
38925
39179
  } catch (error40) {
38926
39180
  return buildErrorResult(evalCase, target.name, nowFn(), error40, promptInputs, provider);
@@ -38938,9 +39192,8 @@ async function evaluateCandidate(options) {
38938
39192
  attempt,
38939
39193
  judgeProvider,
38940
39194
  agentTimeoutMs,
38941
- candidateTrace,
38942
- candidateTraceRef,
38943
- candidateTraceSummary
39195
+ outputMessages,
39196
+ traceSummary
38944
39197
  } = options;
38945
39198
  const gradeTimestamp = nowFn();
38946
39199
  const { score, evaluatorResults } = await runEvaluatorsForCase({
@@ -38954,9 +39207,8 @@ async function evaluateCandidate(options) {
38954
39207
  now: gradeTimestamp,
38955
39208
  judgeProvider,
38956
39209
  agentTimeoutMs,
38957
- candidateTrace,
38958
- candidateTraceRef,
38959
- candidateTraceSummary
39210
+ outputMessages,
39211
+ traceSummary
38960
39212
  });
38961
39213
  const completedAt = nowFn();
38962
39214
  let agentProviderRequest;
@@ -38994,7 +39246,7 @@ async function evaluateCandidate(options) {
38994
39246
  lm_provider_request: lmProviderRequest,
38995
39247
  evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
38996
39248
  evaluator_results: evaluatorResults,
38997
- trace_summary: candidateTraceSummary
39249
+ trace_summary: traceSummary
38998
39250
  };
38999
39251
  }
39000
39252
  async function runEvaluatorsForCase(options) {
@@ -39009,9 +39261,8 @@ async function runEvaluatorsForCase(options) {
39009
39261
  now,
39010
39262
  judgeProvider,
39011
39263
  agentTimeoutMs,
39012
- candidateTrace,
39013
- candidateTraceRef,
39014
- candidateTraceSummary
39264
+ outputMessages,
39265
+ traceSummary
39015
39266
  } = options;
39016
39267
  if (evalCase.evaluators && evalCase.evaluators.length > 0) {
39017
39268
  return runEvaluatorList({
@@ -39026,9 +39277,8 @@ async function runEvaluatorsForCase(options) {
39026
39277
  now,
39027
39278
  judgeProvider,
39028
39279
  agentTimeoutMs,
39029
- candidateTrace,
39030
- candidateTraceRef,
39031
- candidateTraceSummary
39280
+ outputMessages,
39281
+ traceSummary
39032
39282
  });
39033
39283
  }
39034
39284
  const evaluatorKind = evalCase.evaluator ?? "llm_judge";
@@ -39045,9 +39295,8 @@ async function runEvaluatorsForCase(options) {
39045
39295
  promptInputs,
39046
39296
  now,
39047
39297
  judgeProvider,
39048
- candidateTrace,
39049
- candidateTraceRef,
39050
- candidateTraceSummary
39298
+ outputMessages,
39299
+ traceSummary
39051
39300
  });
39052
39301
  return { score };
39053
39302
  }
@@ -39064,9 +39313,8 @@ async function runEvaluatorList(options) {
39064
39313
  now,
39065
39314
  judgeProvider,
39066
39315
  agentTimeoutMs,
39067
- candidateTrace,
39068
- candidateTraceRef,
39069
- candidateTraceSummary
39316
+ outputMessages,
39317
+ traceSummary
39070
39318
  } = options;
39071
39319
  const scored = [];
39072
39320
  const evaluatorResults = [];
@@ -39113,8 +39361,8 @@ async function runEvaluatorList(options) {
39113
39361
  attempt,
39114
39362
  promptInputs,
39115
39363
  now,
39116
- candidateTraceRef,
39117
- candidateTraceSummary
39364
+ outputMessages,
39365
+ traceSummary
39118
39366
  });
39119
39367
  const weight = evaluator.weight ?? 1;
39120
39368
  scored.push({ score: score2, name: evaluator.name, type: "code_judge", weight });
@@ -39200,9 +39448,8 @@ async function runEvaluatorList(options) {
39200
39448
  attempt,
39201
39449
  promptInputs,
39202
39450
  now,
39203
- candidateTrace,
39204
- candidateTraceRef,
39205
- candidateTraceSummary
39451
+ outputMessages,
39452
+ traceSummary
39206
39453
  });
39207
39454
  const weight = evaluator.weight ?? 1;
39208
39455
  scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
@@ -39562,16 +39809,90 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
39562
39809
  return parts.join("\n");
39563
39810
  }
39564
39811
 
39812
+ // src/commands/convert/index.ts
39813
+ import { command, option, optional as optional2, positional, string as string4 } from "cmd-ts";
39814
+ import { stringify as stringifyYaml } from "yaml";
39815
+ function convertJsonlToYaml(inputPath, outputPath) {
39816
+ const content = readFileSync(inputPath, "utf8");
39817
+ const lines = content.trim().split("\n").filter((line2) => line2.trim());
39818
+ let yamlOutput = "";
39819
+ let isFirst = true;
39820
+ for (const line2 of lines) {
39821
+ const record2 = JSON.parse(line2);
39822
+ const yamlDoc = stringifyYaml(record2, {
39823
+ indent: 2,
39824
+ lineWidth: 0
39825
+ });
39826
+ const normalizedYaml = normalizeLineEndings(yamlDoc);
39827
+ const separator = isFirst ? "---\n" : "\n---\n";
39828
+ isFirst = false;
39829
+ yamlOutput += separator + normalizedYaml;
39830
+ }
39831
+ writeFileSync(outputPath, yamlOutput);
39832
+ return lines.length;
39833
+ }
39834
+ var convertCommand = command({
39835
+ name: "convert",
39836
+ description: "Convert evaluation results from JSONL to YAML format",
39837
+ args: {
39838
+ input: positional({
39839
+ type: string4,
39840
+ displayName: "input",
39841
+ description: "Path to input JSONL file"
39842
+ }),
39843
+ out: option({
39844
+ type: optional2(string4),
39845
+ long: "out",
39846
+ short: "o",
39847
+ description: "Output file path (defaults to input path with .yaml extension)"
39848
+ })
39849
+ },
39850
+ handler: async ({ input, out }) => {
39851
+ if (!input.endsWith(".jsonl")) {
39852
+ console.error("Error: Input file must be a .jsonl file");
39853
+ process.exit(1);
39854
+ }
39855
+ const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
39856
+ try {
39857
+ const count = convertJsonlToYaml(input, outputPath);
39858
+ console.log(`Converted ${count} records to ${path14.resolve(outputPath)}`);
39859
+ } catch (error40) {
39860
+ console.error(`Error: ${error40.message}`);
39861
+ process.exit(1);
39862
+ }
39863
+ }
39864
+ });
39865
+
39866
+ // src/commands/eval/index.ts
39867
+ import { stat as stat4 } from "node:fs/promises";
39868
+ import path21 from "node:path";
39869
+ import {
39870
+ command as command2,
39871
+ flag,
39872
+ number as number4,
39873
+ option as option2,
39874
+ optional as optional3,
39875
+ restPositionals,
39876
+ string as string5
39877
+ } from "cmd-ts";
39878
+ import fg from "fast-glob";
39879
+
39880
+ // src/commands/eval/run-eval.ts
39881
+ import { constants as constants6 } from "node:fs";
39882
+ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
39883
+ import path20 from "node:path";
39884
+ import { pathToFileURL } from "node:url";
39885
+
39565
39886
  // src/commands/eval/env.ts
39566
39887
  import { constants as constants4 } from "node:fs";
39567
39888
  import { access as access4 } from "node:fs/promises";
39568
- import path14 from "node:path";
39889
+ import path15 from "node:path";
39569
39890
  import { config as loadDotenv } from "dotenv";
39570
39891
  function uniqueDirs(directories) {
39571
39892
  const seen = /* @__PURE__ */ new Set();
39572
39893
  const result = [];
39573
39894
  for (const dir of directories) {
39574
- const absolute = path14.resolve(dir);
39895
+ const absolute = path15.resolve(dir);
39575
39896
  if (seen.has(absolute)) {
39576
39897
  continue;
39577
39898
  }
@@ -39590,14 +39911,14 @@ async function fileExists4(filePath) {
39590
39911
  }
39591
39912
  function collectAncestorDirectories(start, boundary) {
39592
39913
  const directories = [];
39593
- const boundaryDir = path14.resolve(boundary);
39594
- let current = path14.resolve(start);
39914
+ const boundaryDir = path15.resolve(boundary);
39915
+ let current = path15.resolve(start);
39595
39916
  while (current !== void 0) {
39596
39917
  directories.push(current);
39597
39918
  if (current === boundaryDir) {
39598
39919
  break;
39599
39920
  }
39600
- const parent = path14.dirname(current);
39921
+ const parent = path15.dirname(current);
39601
39922
  if (parent === current) {
39602
39923
  break;
39603
39924
  }
@@ -39607,12 +39928,12 @@ function collectAncestorDirectories(start, boundary) {
39607
39928
  }
39608
39929
  async function loadEnvFromHierarchy(options) {
39609
39930
  const { testFilePath, repoRoot, verbose } = options;
39610
- const testDir = path14.dirname(path14.resolve(testFilePath));
39931
+ const testDir = path15.dirname(path15.resolve(testFilePath));
39611
39932
  const cwd = process.cwd();
39612
39933
  const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
39613
39934
  const envFiles = [];
39614
39935
  for (const dir of searchDirs) {
39615
- const candidate = path14.join(dir, ".env");
39936
+ const candidate = path15.join(dir, ".env");
39616
39937
  if (await fileExists4(candidate)) {
39617
39938
  envFiles.push(candidate);
39618
39939
  }
@@ -39636,7 +39957,7 @@ async function loadEnvFromHierarchy(options) {
39636
39957
  // src/commands/eval/jsonl-writer.ts
39637
39958
  import { createWriteStream as createWriteStream2 } from "node:fs";
39638
39959
  import { mkdir as mkdir5 } from "node:fs/promises";
39639
- import path15 from "node:path";
39960
+ import path16 from "node:path";
39640
39961
  import { finished } from "node:stream/promises";
39641
39962
 
39642
39963
  // ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
@@ -39854,7 +40175,7 @@ var JsonlWriter = class _JsonlWriter {
39854
40175
  this.stream = stream;
39855
40176
  }
39856
40177
  static async open(filePath) {
39857
- await mkdir5(path15.dirname(filePath), { recursive: true });
40178
+ await mkdir5(path16.dirname(filePath), { recursive: true });
39858
40179
  const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
39859
40180
  return new _JsonlWriter(stream);
39860
40181
  }
@@ -39886,9 +40207,9 @@ var JsonlWriter = class _JsonlWriter {
39886
40207
  // src/commands/eval/yaml-writer.ts
39887
40208
  import { createWriteStream as createWriteStream3 } from "node:fs";
39888
40209
  import { mkdir as mkdir6 } from "node:fs/promises";
39889
- import path16 from "node:path";
40210
+ import path17 from "node:path";
39890
40211
  import { finished as finished2 } from "node:stream/promises";
39891
- import { stringify as stringifyYaml } from "yaml";
40212
+ import { stringify as stringifyYaml2 } from "yaml";
39892
40213
  var YamlWriter = class _YamlWriter {
39893
40214
  stream;
39894
40215
  mutex = new Mutex();
@@ -39898,7 +40219,7 @@ var YamlWriter = class _YamlWriter {
39898
40219
  this.stream = stream;
39899
40220
  }
39900
40221
  static async open(filePath) {
39901
- await mkdir6(path16.dirname(filePath), { recursive: true });
40222
+ await mkdir6(path17.dirname(filePath), { recursive: true });
39902
40223
  const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
39903
40224
  return new _YamlWriter(stream);
39904
40225
  }
@@ -39907,7 +40228,7 @@ var YamlWriter = class _YamlWriter {
39907
40228
  if (this.closed) {
39908
40229
  throw new Error("Cannot write to closed YAML writer");
39909
40230
  }
39910
- const yamlDoc = stringifyYaml(record2, {
40231
+ const yamlDoc = stringifyYaml2(record2, {
39911
40232
  indent: 2,
39912
40233
  lineWidth: 0
39913
40234
  // Disable line wrapping
@@ -39963,196 +40284,86 @@ function getDefaultExtension(format) {
39963
40284
  }
39964
40285
 
39965
40286
  // src/commands/eval/progress-display.ts
39966
- import { stripVTControlCharacters } from "node:util";
39967
- var ESC = "\x1B[";
39968
- var CLEAR_LINE = `${ESC}K`;
39969
- var MOVE_CURSOR_UP = `${ESC}1A`;
39970
40287
  var ProgressDisplay = class {
39971
40288
  workers = /* @__PURE__ */ new Map();
39972
- maxWorkers;
39973
40289
  totalTests = 0;
39974
40290
  completedTests = 0;
39975
- renderTimer;
39976
- renderScheduled = false;
39977
- isInteractive;
39978
40291
  logPaths = [];
39979
40292
  logPathSet = /* @__PURE__ */ new Set();
39980
40293
  hasPrintedLogHeader = false;
39981
- windowHeight = 0;
39982
40294
  started = false;
39983
40295
  finished = false;
39984
- constructor(maxWorkers) {
39985
- this.maxWorkers = maxWorkers;
39986
- this.isInteractive = process.stdout.isTTY && !process.env.CI;
40296
+ verbose;
40297
+ constructor(_maxWorkers, options) {
40298
+ this.verbose = options?.verbose ?? false;
39987
40299
  }
39988
40300
  isInteractiveMode() {
39989
- return this.isInteractive;
40301
+ return false;
39990
40302
  }
39991
40303
  start() {
39992
40304
  this.started = true;
39993
40305
  this.finished = false;
39994
- if (this.isInteractive) {
39995
- this.write("\n");
39996
- this.renderTimer = setInterval(() => {
39997
- this.scheduleRender();
39998
- }, 1e3);
39999
- this.renderTimer.unref?.();
40000
- }
40001
40306
  }
40002
40307
  setTotalTests(count) {
40003
40308
  this.totalTests = count;
40004
40309
  }
40005
40310
  updateWorker(progress) {
40311
+ const previous = this.workers.get(progress.workerId);
40006
40312
  this.workers.set(progress.workerId, progress);
40007
40313
  if (progress.status === "completed" || progress.status === "failed") {
40008
40314
  this.completedTests++;
40009
40315
  }
40010
- if (this.isInteractive) {
40011
- this.scheduleRender();
40012
- } else {
40013
- const targetSuffix = progress.targetLabel ? ` | ${progress.targetLabel}` : "";
40014
- if (progress.status === "completed") {
40015
- console.log(`\u2713 Eval ${progress.evalId}${targetSuffix} completed`);
40016
- } else if (progress.status === "failed") {
40316
+ const targetSuffix = progress.targetLabel ? ` | ${progress.targetLabel}` : "";
40317
+ const countPrefix = `${this.completedTests}/${this.totalTests}`;
40318
+ switch (progress.status) {
40319
+ case "pending":
40320
+ if (this.verbose && !previous) {
40321
+ console.log(`${countPrefix} \u23F3 ${progress.evalId}${targetSuffix}`);
40322
+ }
40323
+ break;
40324
+ case "running":
40325
+ if (!previous || previous.status === "pending") {
40326
+ console.log(`${countPrefix} \u{1F504} ${progress.evalId}${targetSuffix}`);
40327
+ }
40328
+ break;
40329
+ case "completed":
40330
+ console.log(`${countPrefix} \u2705 ${progress.evalId}${targetSuffix}`);
40331
+ break;
40332
+ case "failed":
40017
40333
  console.log(
40018
- `\u2717 Eval ${progress.evalId}${targetSuffix} failed${progress.error ? `: ${progress.error}` : ""}`
40334
+ `${countPrefix} \u274C ${progress.evalId}${targetSuffix}${progress.error ? `: ${progress.error}` : ""}`
40019
40335
  );
40020
- }
40336
+ break;
40021
40337
  }
40022
40338
  }
40023
40339
  addLogPaths(paths) {
40024
40340
  const newPaths = [];
40025
- for (const path27 of paths) {
40026
- if (this.logPathSet.has(path27)) {
40341
+ for (const path28 of paths) {
40342
+ if (this.logPathSet.has(path28)) {
40027
40343
  continue;
40028
40344
  }
40029
- this.logPathSet.add(path27);
40030
- newPaths.push(path27);
40345
+ this.logPathSet.add(path28);
40346
+ newPaths.push(path28);
40031
40347
  }
40032
40348
  if (newPaths.length === 0) {
40033
40349
  return;
40034
40350
  }
40035
40351
  this.logPaths.push(...newPaths);
40036
- if (this.isInteractive) {
40037
- this.scheduleRender();
40038
- return;
40039
- }
40040
40352
  if (!this.hasPrintedLogHeader) {
40041
40353
  console.log("");
40042
40354
  console.log("Codex CLI logs:");
40043
40355
  this.hasPrintedLogHeader = true;
40044
40356
  }
40045
40357
  const startIndex = this.logPaths.length - newPaths.length;
40046
- newPaths.forEach((path27, offset) => {
40047
- console.log(`${startIndex + offset + 1}. ${path27}`);
40358
+ newPaths.forEach((path28, offset) => {
40359
+ console.log(`${startIndex + offset + 1}. ${path28}`);
40048
40360
  });
40049
40361
  }
40050
- scheduleRender() {
40051
- if (this.renderScheduled || this.finished) {
40052
- return;
40053
- }
40054
- this.renderScheduled = true;
40055
- setTimeout(() => {
40056
- this.renderScheduled = false;
40057
- this.render();
40058
- }, 100);
40059
- }
40060
- write(content) {
40061
- process.stdout.write(content);
40062
- }
40063
- clearWindow() {
40064
- if (this.windowHeight === 0) {
40065
- return;
40066
- }
40067
- this.write(`\r${CLEAR_LINE}`);
40068
- for (let i = 1; i < this.windowHeight; i++) {
40069
- this.write(`${MOVE_CURSOR_UP}\r${CLEAR_LINE}`);
40070
- }
40071
- this.windowHeight = 0;
40072
- }
40073
- getRenderedRowCount(rows) {
40074
- const columns = process.stdout.columns || 80;
40075
- let count = 0;
40076
- for (const row of rows) {
40077
- const text2 = stripVTControlCharacters(row);
40078
- count += Math.max(1, Math.ceil(text2.length / columns));
40079
- }
40080
- return count;
40081
- }
40082
- render() {
40083
- if (!this.isInteractive || !this.started || this.finished) {
40084
- return;
40085
- }
40086
- const lines = [];
40087
- const sortedWorkers = Array.from(this.workers.values()).sort((a, b) => a.workerId - b.workerId);
40088
- for (const worker of sortedWorkers) {
40089
- const line2 = this.formatWorkerLine(worker);
40090
- lines.push(line2);
40091
- }
40092
- if (this.logPaths.length > 0) {
40093
- lines.push("");
40094
- lines.push("Codex CLI logs:");
40095
- this.logPaths.forEach((path27, index) => {
40096
- lines.push(`${index + 1}. ${path27}`);
40097
- });
40098
- }
40099
- const rowCount = this.getRenderedRowCount(lines);
40100
- this.clearWindow();
40101
- if (lines.length > 0) {
40102
- this.write(lines.join("\n"));
40103
- }
40104
- this.windowHeight = rowCount;
40105
- }
40106
- formatWorkerLine(worker) {
40107
- const workerLabel = `${worker.workerId}.`.padEnd(4);
40108
- const statusIcon = this.getStatusIcon(worker.status);
40109
- const targetLabel = worker.targetLabel ? ` | ${worker.targetLabel}` : "";
40110
- const columns = process.stdout.columns || 80;
40111
- const maxLineLength = Math.max(40, columns - 4);
40112
- const reservedLength = workerLabel.length + statusIcon.length + targetLabel.length + 4;
40113
- const availableLabelLength = Math.max(15, maxLineLength - reservedLength);
40114
- let testLabel = worker.evalId;
40115
- if (testLabel.length > availableLabelLength) {
40116
- testLabel = `${testLabel.substring(0, Math.max(0, availableLabelLength - 3))}...`;
40117
- }
40118
- return `${workerLabel} ${statusIcon} ${testLabel}${targetLabel}`;
40119
- }
40120
- getStatusIcon(status) {
40121
- switch (status) {
40122
- case "pending":
40123
- return "\u23F3";
40124
- case "running":
40125
- return "\u{1F504}";
40126
- case "completed":
40127
- return "\u2705";
40128
- case "failed":
40129
- return "\u274C";
40130
- default:
40131
- return " ";
40132
- }
40133
- }
40134
40362
  finish() {
40135
- if (this.renderTimer) {
40136
- clearInterval(this.renderTimer);
40137
- this.renderTimer = void 0;
40138
- }
40139
40363
  this.finished = true;
40140
- if (this.isInteractive && this.started) {
40141
- this.clearWindow();
40142
- const sortedWorkers = Array.from(this.workers.values()).sort(
40143
- (a, b) => a.workerId - b.workerId
40144
- );
40145
- for (const worker of sortedWorkers) {
40146
- this.write(`${this.formatWorkerLine(worker)}
40147
- `);
40148
- }
40149
- this.write("\n");
40150
- }
40364
+ console.log("");
40151
40365
  }
40152
40366
  clear() {
40153
- if (this.isInteractive) {
40154
- this.clearWindow();
40155
- }
40156
40367
  }
40157
40368
  };
40158
40369
 
@@ -40300,7 +40511,7 @@ function formatEvaluationSummary(summary) {
40300
40511
 
40301
40512
  // ../../packages/core/dist/evaluation/validation/index.js
40302
40513
  import { readFile as readFile7 } from "node:fs/promises";
40303
- import path17 from "node:path";
40514
+ import path18 from "node:path";
40304
40515
  import { parse as parse6 } from "yaml";
40305
40516
  import { readFile as readFile23 } from "node:fs/promises";
40306
40517
  import path23 from "node:path";
@@ -40343,8 +40554,8 @@ async function detectFileType(filePath) {
40343
40554
  }
40344
40555
  }
40345
40556
  function inferFileTypeFromPath(filePath) {
40346
- const normalized = path17.normalize(filePath).replace(/\\/g, "/");
40347
- const basename = path17.basename(filePath);
40557
+ const normalized = path18.normalize(filePath).replace(/\\/g, "/");
40558
+ const basename = path18.basename(filePath);
40348
40559
  if (normalized.includes("/.agentv/")) {
40349
40560
  if (basename === "config.yaml" || basename === "config.yml") {
40350
40561
  return "config";
@@ -40656,6 +40867,9 @@ var CLI_SETTINGS = /* @__PURE__ */ new Set([
40656
40867
  ...COMMON_SETTINGS,
40657
40868
  "command_template",
40658
40869
  "commandTemplate",
40870
+ "verbose",
40871
+ "cli_verbose",
40872
+ "cliVerbose",
40659
40873
  "files_format",
40660
40874
  "filesFormat",
40661
40875
  "attachments_format",
@@ -40664,7 +40878,11 @@ var CLI_SETTINGS = /* @__PURE__ */ new Set([
40664
40878
  "env",
40665
40879
  "timeout_seconds",
40666
40880
  "timeoutSeconds",
40667
- "healthcheck"
40881
+ "healthcheck",
40882
+ "keep_temp_files",
40883
+ "keepTempFiles",
40884
+ "keep_output_files",
40885
+ "keepOutputFiles"
40668
40886
  ]);
40669
40887
  function getKnownSettings(provider) {
40670
40888
  const normalizedProvider = provider.toLowerCase();
@@ -40789,6 +41007,15 @@ async function validateTargetsFile(filePath) {
40789
41007
  if (healthcheck !== void 0) {
40790
41008
  validateCliHealthcheck(healthcheck, absolutePath2, `${location}.healthcheck`, errors2);
40791
41009
  }
41010
+ const verbose = target.verbose ?? target.cli_verbose ?? target.cliVerbose;
41011
+ if (verbose !== void 0 && typeof verbose !== "boolean") {
41012
+ errors2.push({
41013
+ severity: "error",
41014
+ filePath: absolutePath2,
41015
+ location: `${location}.verbose`,
41016
+ message: "'verbose' must be a boolean when provided"
41017
+ });
41018
+ }
40792
41019
  }
40793
41020
  function validateCliHealthcheck(healthcheck, absolutePath2, location, errors2) {
40794
41021
  if (!isObject22(healthcheck)) {
@@ -41173,12 +41400,12 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
41173
41400
  // src/utils/targets.ts
41174
41401
  import { constants as constants5 } from "node:fs";
41175
41402
  import { access as access5 } from "node:fs/promises";
41176
- import path18 from "node:path";
41403
+ import path19 from "node:path";
41177
41404
  var TARGET_FILE_CANDIDATES = [
41178
41405
  "targets.yaml",
41179
41406
  "targets.yml",
41180
- path18.join(".agentv", "targets.yaml"),
41181
- path18.join(".agentv", "targets.yml")
41407
+ path19.join(".agentv", "targets.yaml"),
41408
+ path19.join(".agentv", "targets.yml")
41182
41409
  ];
41183
41410
  async function fileExists5(filePath) {
41184
41411
  try {
@@ -41191,12 +41418,12 @@ async function fileExists5(filePath) {
41191
41418
  async function discoverTargetsFile(options) {
41192
41419
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
41193
41420
  if (explicitPath) {
41194
- const resolvedExplicit = path18.resolve(explicitPath);
41421
+ const resolvedExplicit = path19.resolve(explicitPath);
41195
41422
  if (await fileExists5(resolvedExplicit)) {
41196
41423
  return resolvedExplicit;
41197
41424
  }
41198
41425
  for (const candidate of TARGET_FILE_CANDIDATES) {
41199
- const nested = path18.join(resolvedExplicit, candidate);
41426
+ const nested = path19.join(resolvedExplicit, candidate);
41200
41427
  if (await fileExists5(nested)) {
41201
41428
  return nested;
41202
41429
  }
@@ -41204,13 +41431,13 @@ async function discoverTargetsFile(options) {
41204
41431
  throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
41205
41432
  }
41206
41433
  const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
41207
- const resolvedCwd = path18.resolve(cwd);
41434
+ const resolvedCwd = path19.resolve(cwd);
41208
41435
  if (!directories.includes(resolvedCwd)) {
41209
41436
  directories.push(resolvedCwd);
41210
41437
  }
41211
41438
  for (const directory of directories) {
41212
41439
  for (const candidate of TARGET_FILE_CANDIDATES) {
41213
- const fullPath = path18.join(directory, candidate);
41440
+ const fullPath = path19.join(directory, candidate);
41214
41441
  if (await fileExists5(fullPath)) {
41215
41442
  return fullPath;
41216
41443
  }
@@ -41389,15 +41616,15 @@ async function ensureFileExists(filePath, description) {
41389
41616
  }
41390
41617
  }
41391
41618
  async function findRepoRoot(start) {
41392
- const fallback = path19.resolve(start);
41619
+ const fallback = path20.resolve(start);
41393
41620
  let current = fallback;
41394
41621
  while (current !== void 0) {
41395
- const candidate = path19.join(current, ".git");
41622
+ const candidate = path20.join(current, ".git");
41396
41623
  try {
41397
41624
  await access6(candidate, constants6.F_OK);
41398
41625
  return current;
41399
41626
  } catch {
41400
- const parent = path19.dirname(current);
41627
+ const parent = path20.dirname(current);
41401
41628
  if (parent === current) {
41402
41629
  break;
41403
41630
  }
@@ -41410,16 +41637,16 @@ function buildDefaultOutputPath(cwd, format) {
41410
41637
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
41411
41638
  const baseName = "eval";
41412
41639
  const extension = getDefaultExtension(format);
41413
- return path19.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
41640
+ return path20.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
41414
41641
  }
41415
- function resolvePromptDirectory(option4, cwd) {
41416
- if (option4 === void 0) {
41642
+ function resolvePromptDirectory(option5, cwd) {
41643
+ if (option5 === void 0) {
41417
41644
  return void 0;
41418
41645
  }
41419
- if (typeof option4 === "string" && option4.trim().length > 0) {
41420
- return path19.resolve(cwd, option4);
41646
+ if (typeof option5 === "string" && option5.trim().length > 0) {
41647
+ return path20.resolve(cwd, option5);
41421
41648
  }
41422
- return path19.join(cwd, ".agentv", "prompts");
41649
+ return path20.join(cwd, ".agentv", "prompts");
41423
41650
  }
41424
41651
  function createEvaluationCache() {
41425
41652
  const store = /* @__PURE__ */ new Map();
@@ -41432,8 +41659,8 @@ function createEvaluationCache() {
41432
41659
  }
41433
41660
  };
41434
41661
  }
41435
- function createProgressReporter(maxWorkers) {
41436
- const display = new ProgressDisplay(maxWorkers);
41662
+ function createProgressReporter(maxWorkers, options) {
41663
+ const display = new ProgressDisplay(maxWorkers, options);
41437
41664
  return {
41438
41665
  isInteractive: display.isInteractiveMode(),
41439
41666
  start: () => display.start(),
@@ -41444,7 +41671,7 @@ function createProgressReporter(maxWorkers) {
41444
41671
  };
41445
41672
  }
41446
41673
  function makeEvalKey(testFilePath, evalId) {
41447
- return `${path19.resolve(testFilePath)}::${evalId}`;
41674
+ return `${path20.resolve(testFilePath)}::${evalId}`;
41448
41675
  }
41449
41676
  function createDisplayIdTracker() {
41450
41677
  const map2 = /* @__PURE__ */ new Map();
@@ -41461,6 +41688,22 @@ function createDisplayIdTracker() {
41461
41688
  }
41462
41689
  };
41463
41690
  }
41691
+ function applyVerboseOverride(selection, cliVerbose) {
41692
+ const { resolvedTarget } = selection;
41693
+ if (resolvedTarget.kind !== "cli") {
41694
+ return selection;
41695
+ }
41696
+ return {
41697
+ ...selection,
41698
+ resolvedTarget: {
41699
+ ...resolvedTarget,
41700
+ config: {
41701
+ ...resolvedTarget.config,
41702
+ verbose: cliVerbose
41703
+ }
41704
+ }
41705
+ };
41706
+ }
41464
41707
  async function prepareFileMetadata(params) {
41465
41708
  const { testFilePath, repoRoot, cwd, options } = params;
41466
41709
  await ensureFileExists(testFilePath, "Test file");
@@ -41520,7 +41763,7 @@ async function runSingleEvalFile(params) {
41520
41763
  evalCases
41521
41764
  } = params;
41522
41765
  await ensureFileExists(testFilePath, "Test file");
41523
- const resolvedTargetSelection = selection;
41766
+ const resolvedTargetSelection = applyVerboseOverride(selection, options.verbose);
41524
41767
  const providerLabel = options.dryRun ? `${resolvedTargetSelection.resolvedTarget.kind} (dry-run)` : resolvedTargetSelection.resolvedTarget.kind;
41525
41768
  const targetMessage = options.verbose ? `Using target (${resolvedTargetSelection.targetSource}): ${resolvedTargetSelection.targetName} [provider=${providerLabel}] via ${resolvedTargetSelection.targetsFilePath}` : `Using target: ${inlineTargetLabel}`;
41526
41769
  if (!progressReporter.isInteractive || options.verbose) {
@@ -41600,7 +41843,7 @@ async function runEvalCommand(input) {
41600
41843
  if (options.verbose) {
41601
41844
  console.log(`Repository root: ${repoRoot}`);
41602
41845
  }
41603
- const outputPath = options.outPath ? path19.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
41846
+ const outputPath = options.outPath ? path20.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
41604
41847
  console.log(`Output path: ${outputPath}`);
41605
41848
  const outputWriter = await createOutputWriter(outputPath, options.format);
41606
41849
  const cache = options.cache ? createEvaluationCache() : void 0;
@@ -41608,7 +41851,7 @@ async function runEvalCommand(input) {
41608
41851
  const allResults = [];
41609
41852
  let lastPromptDumpDir;
41610
41853
  const seenEvalCases = /* @__PURE__ */ new Set();
41611
- const resolvedTestFiles = input.testFiles.map((file2) => path19.resolve(file2));
41854
+ const resolvedTestFiles = input.testFiles.map((file2) => path20.resolve(file2));
41612
41855
  const displayIdTracker = createDisplayIdTracker();
41613
41856
  const totalWorkers = options.workers ?? DEFAULT_WORKERS;
41614
41857
  const fileConcurrency = Math.min(
@@ -41633,7 +41876,7 @@ async function runEvalCommand(input) {
41633
41876
  if (totalEvalCount === 0) {
41634
41877
  throw new Error("No eval cases matched the provided filters.");
41635
41878
  }
41636
- const progressReporter = createProgressReporter(totalWorkers);
41879
+ const progressReporter = createProgressReporter(totalWorkers, { verbose: options.verbose });
41637
41880
  progressReporter.start();
41638
41881
  progressReporter.setTotal(totalEvalCount);
41639
41882
  const seenCodexLogPaths = /* @__PURE__ */ new Set();
@@ -41704,7 +41947,7 @@ async function resolveEvaluationRunner() {
41704
41947
  if (!overridePath) {
41705
41948
  return runEvaluation;
41706
41949
  }
41707
- const resolved = path19.isAbsolute(overridePath) ? overridePath : path19.resolve(process.cwd(), overridePath);
41950
+ const resolved = path20.isAbsolute(overridePath) ? overridePath : path20.resolve(process.cwd(), overridePath);
41708
41951
  const moduleUrl = pathToFileURL(resolved).href;
41709
41952
  const mod = await import(moduleUrl);
41710
41953
  const candidate = mod.runEvaluation;
@@ -41717,44 +41960,44 @@ async function resolveEvaluationRunner() {
41717
41960
  }
41718
41961
 
41719
41962
  // src/commands/eval/index.ts
41720
- var evalCommand = command({
41963
+ var evalCommand = command2({
41721
41964
  name: "eval",
41722
41965
  description: "Run eval suites and report results",
41723
41966
  args: {
41724
41967
  evalPaths: restPositionals({
41725
- type: string4,
41968
+ type: string5,
41726
41969
  displayName: "eval-paths",
41727
41970
  description: "Path(s) or glob(s) to evaluation .yaml file(s)"
41728
41971
  }),
41729
- target: option({
41730
- type: string4,
41972
+ target: option2({
41973
+ type: string5,
41731
41974
  long: "target",
41732
41975
  description: "Override target name from targets.yaml",
41733
41976
  defaultValue: () => "default"
41734
41977
  }),
41735
- targets: option({
41736
- type: optional2(string4),
41978
+ targets: option2({
41979
+ type: optional3(string5),
41737
41980
  long: "targets",
41738
41981
  description: "Path to targets.yaml (overrides discovery)"
41739
41982
  }),
41740
- evalId: option({
41741
- type: optional2(string4),
41983
+ evalId: option2({
41984
+ type: optional3(string5),
41742
41985
  long: "eval-id",
41743
41986
  description: "Run only the eval case with this identifier"
41744
41987
  }),
41745
- workers: option({
41988
+ workers: option2({
41746
41989
  type: number4,
41747
41990
  long: "workers",
41748
41991
  description: "Number of parallel workers (default: 3, max: 50). Can also be set per-target in targets.yaml",
41749
41992
  defaultValue: () => 3
41750
41993
  }),
41751
- out: option({
41752
- type: optional2(string4),
41994
+ out: option2({
41995
+ type: optional3(string5),
41753
41996
  long: "out",
41754
41997
  description: "Write results to the specified path"
41755
41998
  }),
41756
- outputFormat: option({
41757
- type: string4,
41999
+ outputFormat: option2({
42000
+ type: string5,
41758
42001
  long: "output-format",
41759
42002
  description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
41760
42003
  defaultValue: () => "jsonl"
@@ -41763,31 +42006,31 @@ var evalCommand = command({
41763
42006
  long: "dry-run",
41764
42007
  description: "Use mock provider responses instead of real LLM calls"
41765
42008
  }),
41766
- dryRunDelay: option({
42009
+ dryRunDelay: option2({
41767
42010
  type: number4,
41768
42011
  long: "dry-run-delay",
41769
42012
  description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
41770
42013
  defaultValue: () => 0
41771
42014
  }),
41772
- dryRunDelayMin: option({
42015
+ dryRunDelayMin: option2({
41773
42016
  type: number4,
41774
42017
  long: "dry-run-delay-min",
41775
42018
  description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
41776
42019
  defaultValue: () => 0
41777
42020
  }),
41778
- dryRunDelayMax: option({
42021
+ dryRunDelayMax: option2({
41779
42022
  type: number4,
41780
42023
  long: "dry-run-delay-max",
41781
42024
  description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
41782
42025
  defaultValue: () => 0
41783
42026
  }),
41784
- agentTimeout: option({
42027
+ agentTimeout: option2({
41785
42028
  type: number4,
41786
42029
  long: "agent-timeout",
41787
42030
  description: "Timeout in seconds for provider responses (default: 120)",
41788
42031
  defaultValue: () => 120
41789
42032
  }),
41790
- maxRetries: option({
42033
+ maxRetries: option2({
41791
42034
  type: number4,
41792
42035
  long: "max-retries",
41793
42036
  description: "Retry count for timeout recoveries (default: 2)",
@@ -41801,8 +42044,8 @@ var evalCommand = command({
41801
42044
  long: "verbose",
41802
42045
  description: "Enable verbose logging"
41803
42046
  }),
41804
- dumpPrompts: option({
41805
- type: optional2(string4),
42047
+ dumpPrompts: option2({
42048
+ type: optional3(string5),
41806
42049
  long: "dump-prompts",
41807
42050
  description: "Directory path for persisting prompt payloads for debugging"
41808
42051
  }),
@@ -41848,7 +42091,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
41848
42091
  const unmatched = [];
41849
42092
  const results = /* @__PURE__ */ new Set();
41850
42093
  for (const pattern of normalizedInputs) {
41851
- const candidatePath = path20.isAbsolute(pattern) ? path20.normalize(pattern) : path20.resolve(cwd, pattern);
42094
+ const candidatePath = path21.isAbsolute(pattern) ? path21.normalize(pattern) : path21.resolve(cwd, pattern);
41852
42095
  try {
41853
42096
  const stats = await stat4(candidatePath);
41854
42097
  if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
@@ -41872,7 +42115,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
41872
42115
  continue;
41873
42116
  }
41874
42117
  for (const filePath of yamlMatches) {
41875
- results.add(path20.normalize(filePath));
42118
+ results.add(path21.normalize(filePath));
41876
42119
  }
41877
42120
  }
41878
42121
  if (unmatched.length > 0) {
@@ -41888,11 +42131,11 @@ async function resolveEvalPaths(evalPaths, cwd) {
41888
42131
  }
41889
42132
 
41890
42133
  // src/commands/generate/index.ts
41891
- import { command as command2, flag as flag2, option as option2, optional as optional3, positional as positional2, string as string5, subcommands } from "cmd-ts";
42134
+ import { command as command3, flag as flag2, option as option3, optional as optional4, positional as positional3, string as string6, subcommands } from "cmd-ts";
41892
42135
 
41893
42136
  // src/commands/generate/rubrics.ts
41894
42137
  import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
41895
- import path21 from "node:path";
42138
+ import path24 from "node:path";
41896
42139
  import { pathToFileURL as pathToFileURL2 } from "node:url";
41897
42140
  import { isMap, isSeq, parseDocument } from "yaml";
41898
42141
  function isJsonObject3(value) {
@@ -41904,7 +42147,7 @@ function asString6(value) {
41904
42147
  async function loadRubricGenerator() {
41905
42148
  const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
41906
42149
  if (customGenerator) {
41907
- const generatorPath = path21.resolve(customGenerator);
42150
+ const generatorPath = path24.resolve(customGenerator);
41908
42151
  const generatorUrl = pathToFileURL2(generatorPath).href;
41909
42152
  const module = await import(generatorUrl);
41910
42153
  return module.generateRubrics;
@@ -41914,7 +42157,7 @@ async function loadRubricGenerator() {
41914
42157
  async function generateRubricsCommand(options) {
41915
42158
  const { file: file2, target: targetOverride, verbose } = options;
41916
42159
  console.log(`Generating rubrics for: ${file2}`);
41917
- const absolutePath = path21.resolve(file2);
42160
+ const absolutePath = path24.resolve(file2);
41918
42161
  const content = await readFile8(absolutePath, "utf8");
41919
42162
  const doc = parseDocument(content);
41920
42163
  const parsed = doc.toJSON();
@@ -42031,17 +42274,17 @@ function extractQuestion(evalCase) {
42031
42274
  }
42032
42275
 
42033
42276
  // src/commands/generate/index.ts
42034
- var rubricsCommand = command2({
42277
+ var rubricsCommand = command3({
42035
42278
  name: "rubrics",
42036
42279
  description: "Generate rubrics from expected_outcome in YAML eval file",
42037
42280
  args: {
42038
- file: positional2({
42039
- type: string5,
42281
+ file: positional3({
42282
+ type: string6,
42040
42283
  displayName: "file",
42041
42284
  description: "Path to YAML eval file"
42042
42285
  }),
42043
- target: option2({
42044
- type: optional3(string5),
42286
+ target: option3({
42287
+ type: optional4(string6),
42045
42288
  long: "target",
42046
42289
  short: "t",
42047
42290
  description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
@@ -42074,14 +42317,14 @@ var generateCommand = subcommands({
42074
42317
  });
42075
42318
 
42076
42319
  // src/commands/init/index.ts
42077
- import { existsSync, mkdirSync, writeFileSync } from "node:fs";
42078
- import path25 from "node:path";
42320
+ import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
42321
+ import path26 from "node:path";
42079
42322
  import * as readline from "node:readline/promises";
42080
- import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
42323
+ import { command as command4, option as option4, optional as optional5, string as string7 } from "cmd-ts";
42081
42324
 
42082
42325
  // src/templates/index.ts
42083
- import { readFileSync, readdirSync, statSync } from "node:fs";
42084
- import path24 from "node:path";
42326
+ import { readFileSync as readFileSync2, readdirSync, statSync } from "node:fs";
42327
+ import path25 from "node:path";
42085
42328
  import { fileURLToPath } from "node:url";
42086
42329
  function getGithubTemplates() {
42087
42330
  return getTemplatesFromDir(".github");
@@ -42093,12 +42336,12 @@ function getClaudeTemplates() {
42093
42336
  return getTemplatesFromDir(".claude");
42094
42337
  }
42095
42338
  function getTemplatesFromDir(subdir) {
42096
- const currentDir = path24.dirname(fileURLToPath(import.meta.url));
42339
+ const currentDir = path25.dirname(fileURLToPath(import.meta.url));
42097
42340
  let templatesDir;
42098
- if (currentDir.includes(`${path24.sep}dist`)) {
42099
- templatesDir = path24.join(currentDir, "templates", subdir);
42341
+ if (currentDir.includes(`${path25.sep}dist`)) {
42342
+ templatesDir = path25.join(currentDir, "templates", subdir);
42100
42343
  } else {
42101
- templatesDir = path24.join(currentDir, subdir);
42344
+ templatesDir = path25.join(currentDir, subdir);
42102
42345
  }
42103
42346
  return readTemplatesRecursively(templatesDir, "");
42104
42347
  }
@@ -42106,15 +42349,15 @@ function readTemplatesRecursively(dir, relativePath) {
42106
42349
  const templates = [];
42107
42350
  const entries = readdirSync(dir);
42108
42351
  for (const entry of entries) {
42109
- const fullPath = path24.join(dir, entry);
42352
+ const fullPath = path25.join(dir, entry);
42110
42353
  const stat6 = statSync(fullPath);
42111
- const entryRelativePath = relativePath ? path24.join(relativePath, entry) : entry;
42354
+ const entryRelativePath = relativePath ? path25.join(relativePath, entry) : entry;
42112
42355
  if (stat6.isDirectory()) {
42113
42356
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
42114
42357
  } else {
42115
- const content = readFileSync(fullPath, "utf-8");
42358
+ const content = readFileSync2(fullPath, "utf-8");
42116
42359
  templates.push({
42117
- path: entryRelativePath.split(path24.sep).join("/"),
42360
+ path: entryRelativePath.split(path25.sep).join("/"),
42118
42361
  // Normalize to forward slashes
42119
42362
  content
42120
42363
  });
@@ -42137,10 +42380,10 @@ async function promptYesNo(message) {
42137
42380
  }
42138
42381
  }
42139
42382
  async function initCommand(options = {}) {
42140
- const targetPath = path25.resolve(options.targetPath ?? ".");
42141
- const githubDir = path25.join(targetPath, ".github");
42142
- const agentvDir = path25.join(targetPath, ".agentv");
42143
- const claudeDir = path25.join(targetPath, ".claude");
42383
+ const targetPath = path26.resolve(options.targetPath ?? ".");
42384
+ const githubDir = path26.join(targetPath, ".github");
42385
+ const agentvDir = path26.join(targetPath, ".agentv");
42386
+ const claudeDir = path26.join(targetPath, ".claude");
42144
42387
  const githubTemplates = getGithubTemplates();
42145
42388
  const agentvTemplates = getAgentvTemplates();
42146
42389
  const claudeTemplates = getClaudeTemplates();
@@ -42148,32 +42391,32 @@ async function initCommand(options = {}) {
42148
42391
  const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
42149
42392
  const existingFiles = [];
42150
42393
  if (envTemplate) {
42151
- const envFilePath = path25.join(targetPath, ".env.template");
42394
+ const envFilePath = path26.join(targetPath, ".env.template");
42152
42395
  if (existsSync(envFilePath)) {
42153
42396
  existingFiles.push(".env.template");
42154
42397
  }
42155
42398
  }
42156
42399
  if (existsSync(githubDir)) {
42157
42400
  for (const template of githubTemplates) {
42158
- const targetFilePath = path25.join(githubDir, template.path);
42401
+ const targetFilePath = path26.join(githubDir, template.path);
42159
42402
  if (existsSync(targetFilePath)) {
42160
- existingFiles.push(path25.relative(targetPath, targetFilePath));
42403
+ existingFiles.push(path26.relative(targetPath, targetFilePath));
42161
42404
  }
42162
42405
  }
42163
42406
  }
42164
42407
  if (existsSync(agentvDir)) {
42165
42408
  for (const template of otherAgentvTemplates) {
42166
- const targetFilePath = path25.join(agentvDir, template.path);
42409
+ const targetFilePath = path26.join(agentvDir, template.path);
42167
42410
  if (existsSync(targetFilePath)) {
42168
- existingFiles.push(path25.relative(targetPath, targetFilePath));
42411
+ existingFiles.push(path26.relative(targetPath, targetFilePath));
42169
42412
  }
42170
42413
  }
42171
42414
  }
42172
42415
  if (existsSync(claudeDir)) {
42173
42416
  for (const template of claudeTemplates) {
42174
- const targetFilePath = path25.join(claudeDir, template.path);
42417
+ const targetFilePath = path26.join(claudeDir, template.path);
42175
42418
  if (existsSync(targetFilePath)) {
42176
- existingFiles.push(path25.relative(targetPath, targetFilePath));
42419
+ existingFiles.push(path26.relative(targetPath, targetFilePath));
42177
42420
  }
42178
42421
  }
42179
42422
  }
@@ -42200,36 +42443,36 @@ async function initCommand(options = {}) {
42200
42443
  mkdirSync(claudeDir, { recursive: true });
42201
42444
  }
42202
42445
  if (envTemplate) {
42203
- const envFilePath = path25.join(targetPath, ".env.template");
42204
- writeFileSync(envFilePath, envTemplate.content, "utf-8");
42446
+ const envFilePath = path26.join(targetPath, ".env.template");
42447
+ writeFileSync2(envFilePath, envTemplate.content, "utf-8");
42205
42448
  console.log("Created .env.template");
42206
42449
  }
42207
42450
  for (const template of githubTemplates) {
42208
- const targetFilePath = path25.join(githubDir, template.path);
42209
- const targetDirPath = path25.dirname(targetFilePath);
42451
+ const targetFilePath = path26.join(githubDir, template.path);
42452
+ const targetDirPath = path26.dirname(targetFilePath);
42210
42453
  if (!existsSync(targetDirPath)) {
42211
42454
  mkdirSync(targetDirPath, { recursive: true });
42212
42455
  }
42213
- writeFileSync(targetFilePath, template.content, "utf-8");
42214
- console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
42456
+ writeFileSync2(targetFilePath, template.content, "utf-8");
42457
+ console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
42215
42458
  }
42216
42459
  for (const template of otherAgentvTemplates) {
42217
- const targetFilePath = path25.join(agentvDir, template.path);
42218
- const targetDirPath = path25.dirname(targetFilePath);
42460
+ const targetFilePath = path26.join(agentvDir, template.path);
42461
+ const targetDirPath = path26.dirname(targetFilePath);
42219
42462
  if (!existsSync(targetDirPath)) {
42220
42463
  mkdirSync(targetDirPath, { recursive: true });
42221
42464
  }
42222
- writeFileSync(targetFilePath, template.content, "utf-8");
42223
- console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
42465
+ writeFileSync2(targetFilePath, template.content, "utf-8");
42466
+ console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
42224
42467
  }
42225
42468
  for (const template of claudeTemplates) {
42226
- const targetFilePath = path25.join(claudeDir, template.path);
42227
- const targetDirPath = path25.dirname(targetFilePath);
42469
+ const targetFilePath = path26.join(claudeDir, template.path);
42470
+ const targetDirPath = path26.dirname(targetFilePath);
42228
42471
  if (!existsSync(targetDirPath)) {
42229
42472
  mkdirSync(targetDirPath, { recursive: true });
42230
42473
  }
42231
- writeFileSync(targetFilePath, template.content, "utf-8");
42232
- console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
42474
+ writeFileSync2(targetFilePath, template.content, "utf-8");
42475
+ console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
42233
42476
  }
42234
42477
  console.log("\nAgentV initialized successfully!");
42235
42478
  console.log("\nFiles installed to root:");
@@ -42237,17 +42480,17 @@ async function initCommand(options = {}) {
42237
42480
  console.log(" - .env.template");
42238
42481
  }
42239
42482
  console.log(`
42240
- Files installed to ${path25.relative(targetPath, githubDir)}:`);
42483
+ Files installed to ${path26.relative(targetPath, githubDir)}:`);
42241
42484
  for (const t of githubTemplates) {
42242
42485
  console.log(` - ${t.path}`);
42243
42486
  }
42244
42487
  console.log(`
42245
- Files installed to ${path25.relative(targetPath, agentvDir)}:`);
42488
+ Files installed to ${path26.relative(targetPath, agentvDir)}:`);
42246
42489
  for (const t of otherAgentvTemplates) {
42247
42490
  console.log(` - ${t.path}`);
42248
42491
  }
42249
42492
  console.log(`
42250
- Files installed to ${path25.relative(targetPath, claudeDir)}:`);
42493
+ Files installed to ${path26.relative(targetPath, claudeDir)}:`);
42251
42494
  for (const t of claudeTemplates) {
42252
42495
  console.log(` - ${t.path}`);
42253
42496
  }
@@ -42256,12 +42499,12 @@ Files installed to ${path25.relative(targetPath, claudeDir)}:`);
42256
42499
  console.log(" 2. Configure targets in .agentv/targets.yaml");
42257
42500
  console.log(" 3. Create eval files using the schema and prompt templates");
42258
42501
  }
42259
- var initCmdTsCommand = command3({
42502
+ var initCmdTsCommand = command4({
42260
42503
  name: "init",
42261
42504
  description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
42262
42505
  args: {
42263
- path: option3({
42264
- type: optional4(string6),
42506
+ path: option4({
42507
+ type: optional5(string7),
42265
42508
  long: "path",
42266
42509
  description: "Target directory for initialization (default: current directory)"
42267
42510
  })
@@ -42277,7 +42520,7 @@ var initCmdTsCommand = command3({
42277
42520
  });
42278
42521
 
42279
42522
  // src/commands/validate/index.ts
42280
- import { command as command4, restPositionals as restPositionals2, string as string7 } from "cmd-ts";
42523
+ import { command as command5, restPositionals as restPositionals2, string as string8 } from "cmd-ts";
42281
42524
 
42282
42525
  // src/commands/validate/format-output.ts
42283
42526
  var ANSI_RED3 = "\x1B[31m";
@@ -42362,7 +42605,7 @@ function isTTY2() {
42362
42605
  // src/commands/validate/validate-files.ts
42363
42606
  import { constants as constants7 } from "node:fs";
42364
42607
  import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
42365
- import path26 from "node:path";
42608
+ import path27 from "node:path";
42366
42609
  async function validateFiles(paths) {
42367
42610
  const filePaths = await expandPaths(paths);
42368
42611
  const results = [];
@@ -42380,7 +42623,7 @@ async function validateFiles(paths) {
42380
42623
  };
42381
42624
  }
42382
42625
  async function validateSingleFile(filePath) {
42383
- const absolutePath = path26.resolve(filePath);
42626
+ const absolutePath = path27.resolve(filePath);
42384
42627
  const fileType = await detectFileType(absolutePath);
42385
42628
  let result;
42386
42629
  if (fileType === "eval") {
@@ -42405,7 +42648,7 @@ async function validateSingleFile(filePath) {
42405
42648
  async function expandPaths(paths) {
42406
42649
  const expanded = [];
42407
42650
  for (const inputPath of paths) {
42408
- const absolutePath = path26.resolve(inputPath);
42651
+ const absolutePath = path27.resolve(inputPath);
42409
42652
  try {
42410
42653
  await access7(absolutePath, constants7.F_OK);
42411
42654
  } catch {
@@ -42429,7 +42672,7 @@ async function findYamlFiles(dirPath) {
42429
42672
  try {
42430
42673
  const entries = await readdir3(dirPath, { withFileTypes: true });
42431
42674
  for (const entry of entries) {
42432
- const fullPath = path26.join(dirPath, entry.name);
42675
+ const fullPath = path27.join(dirPath, entry.name);
42433
42676
  if (entry.isDirectory()) {
42434
42677
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
42435
42678
  continue;
@@ -42446,7 +42689,7 @@ async function findYamlFiles(dirPath) {
42446
42689
  return results;
42447
42690
  }
42448
42691
  function isYamlFile(filePath) {
42449
- const ext = path26.extname(filePath).toLowerCase();
42692
+ const ext = path27.extname(filePath).toLowerCase();
42450
42693
  return ext === ".yaml" || ext === ".yml";
42451
42694
  }
42452
42695
 
@@ -42463,12 +42706,12 @@ async function runValidateCommand(paths) {
42463
42706
  process.exit(1);
42464
42707
  }
42465
42708
  }
42466
- var validateCommand = command4({
42709
+ var validateCommand = command5({
42467
42710
  name: "validate",
42468
42711
  description: "Validate AgentV eval and targets YAML files",
42469
42712
  args: {
42470
42713
  paths: restPositionals2({
42471
- type: string7,
42714
+ type: string8,
42472
42715
  displayName: "paths",
42473
42716
  description: "Files or directories to validate"
42474
42717
  })
@@ -42484,16 +42727,17 @@ var validateCommand = command4({
42484
42727
  });
42485
42728
 
42486
42729
  // src/index.ts
42487
- var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
42730
+ var packageJson = JSON.parse(readFileSync3(new URL("../package.json", import.meta.url), "utf8"));
42488
42731
  var app = subcommands2({
42489
42732
  name: "agentv",
42490
42733
  description: "AgentV CLI",
42491
42734
  version: packageJson.version,
42492
42735
  cmds: {
42736
+ convert: convertCommand,
42493
42737
  eval: evalCommand,
42494
- validate: validateCommand,
42495
42738
  generate: generateCommand,
42496
- init: initCmdTsCommand
42739
+ init: initCmdTsCommand,
42740
+ validate: validateCommand
42497
42741
  }
42498
42742
  });
42499
42743
  async function runCli(argv = process.argv) {
@@ -42504,4 +42748,4 @@ export {
42504
42748
  app,
42505
42749
  runCli
42506
42750
  };
42507
- //# sourceMappingURL=chunk-IVIT4U6S.js.map
42751
+ //# sourceMappingURL=chunk-3RYQPI4H.js.map