agentv 1.3.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -141,30 +141,14 @@ var require_dist = __commonJS({
141
141
  });
142
142
 
143
143
  // src/index.ts
144
- import { readFileSync as readFileSync2 } from "node:fs";
144
+ import { readFileSync as readFileSync3 } from "node:fs";
145
145
  import { binary, run, subcommands as subcommands2 } from "cmd-ts";
146
146
 
147
- // src/commands/eval/index.ts
148
- import { stat as stat4 } from "node:fs/promises";
149
- import path20 from "node:path";
150
- import {
151
- command,
152
- flag,
153
- number as number4,
154
- option,
155
- optional as optional2,
156
- restPositionals,
157
- string as string4
158
- } from "cmd-ts";
159
- import fg from "fast-glob";
160
-
161
- // src/commands/eval/run-eval.ts
162
- import { constants as constants6 } from "node:fs";
163
- import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
164
- import path19 from "node:path";
165
- import { pathToFileURL } from "node:url";
147
+ // src/commands/convert/index.ts
148
+ import { readFileSync, writeFileSync } from "node:fs";
149
+ import path14 from "node:path";
166
150
 
167
- // ../../packages/core/dist/chunk-4A6L2F6L.js
151
+ // ../../packages/core/dist/chunk-KPHTMTZ3.js
168
152
  import { constants } from "node:fs";
169
153
  import { access, readFile } from "node:fs/promises";
170
154
  import path from "node:path";
@@ -648,8 +632,8 @@ function getErrorMap() {
648
632
 
649
633
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
650
634
  var makeIssue = (params) => {
651
- const { data, path: path27, errorMaps, issueData } = params;
652
- const fullPath = [...path27, ...issueData.path || []];
635
+ const { data, path: path28, errorMaps, issueData } = params;
636
+ const fullPath = [...path28, ...issueData.path || []];
653
637
  const fullIssue = {
654
638
  ...issueData,
655
639
  path: fullPath
@@ -765,11 +749,11 @@ var errorUtil;
765
749
 
766
750
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
767
751
  var ParseInputLazyPath = class {
768
- constructor(parent, value, path27, key2) {
752
+ constructor(parent, value, path28, key2) {
769
753
  this._cachedPath = [];
770
754
  this.parent = parent;
771
755
  this.data = value;
772
- this._path = path27;
756
+ this._path = path28;
773
757
  this._key = key2;
774
758
  }
775
759
  get path() {
@@ -1049,8 +1033,8 @@ var ZodType = class {
1049
1033
  promise() {
1050
1034
  return ZodPromise.create(this, this._def);
1051
1035
  }
1052
- or(option4) {
1053
- return ZodUnion.create([this, option4], this._def);
1036
+ or(option5) {
1037
+ return ZodUnion.create([this, option5], this._def);
1054
1038
  }
1055
1039
  and(incoming) {
1056
1040
  return ZodIntersection.create(this, incoming, this._def);
@@ -2900,7 +2884,7 @@ var ZodUnion = class extends ZodType {
2900
2884
  return INVALID;
2901
2885
  }
2902
2886
  if (ctx.common.async) {
2903
- return Promise.all(options.map(async (option4) => {
2887
+ return Promise.all(options.map(async (option5) => {
2904
2888
  const childCtx = {
2905
2889
  ...ctx,
2906
2890
  common: {
@@ -2910,7 +2894,7 @@ var ZodUnion = class extends ZodType {
2910
2894
  parent: null
2911
2895
  };
2912
2896
  return {
2913
- result: await option4._parseAsync({
2897
+ result: await option5._parseAsync({
2914
2898
  data: ctx.data,
2915
2899
  path: ctx.path,
2916
2900
  parent: childCtx
@@ -2921,7 +2905,7 @@ var ZodUnion = class extends ZodType {
2921
2905
  } else {
2922
2906
  let dirty = void 0;
2923
2907
  const issues = [];
2924
- for (const option4 of options) {
2908
+ for (const option5 of options) {
2925
2909
  const childCtx = {
2926
2910
  ...ctx,
2927
2911
  common: {
@@ -2930,7 +2914,7 @@ var ZodUnion = class extends ZodType {
2930
2914
  },
2931
2915
  parent: null
2932
2916
  };
2933
- const result = option4._parseSync({
2917
+ const result = option5._parseSync({
2934
2918
  data: ctx.data,
2935
2919
  path: ctx.path,
2936
2920
  parent: childCtx
@@ -3011,8 +2995,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
3011
2995
  }
3012
2996
  const discriminator = this.discriminator;
3013
2997
  const discriminatorValue = ctx.data[discriminator];
3014
- const option4 = this.optionsMap.get(discriminatorValue);
3015
- if (!option4) {
2998
+ const option5 = this.optionsMap.get(discriminatorValue);
2999
+ if (!option5) {
3016
3000
  addIssueToContext(ctx, {
3017
3001
  code: ZodIssueCode.invalid_union_discriminator,
3018
3002
  options: Array.from(this.optionsMap.keys()),
@@ -3021,13 +3005,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
3021
3005
  return INVALID;
3022
3006
  }
3023
3007
  if (ctx.common.async) {
3024
- return option4._parseAsync({
3008
+ return option5._parseAsync({
3025
3009
  data: ctx.data,
3026
3010
  path: ctx.path,
3027
3011
  parent: ctx
3028
3012
  });
3029
3013
  } else {
3030
- return option4._parseSync({
3014
+ return option5._parseSync({
3031
3015
  data: ctx.data,
3032
3016
  path: ctx.path,
3033
3017
  parent: ctx
@@ -4211,7 +4195,7 @@ var coerce = {
4211
4195
  };
4212
4196
  var NEVER = INVALID;
4213
4197
 
4214
- // ../../packages/core/dist/chunk-4A6L2F6L.js
4198
+ // ../../packages/core/dist/chunk-KPHTMTZ3.js
4215
4199
  async function fileExists(filePath) {
4216
4200
  try {
4217
4201
  await access(filePath, constants.F_OK);
@@ -4227,10 +4211,6 @@ async function readTextFile(filePath) {
4227
4211
  const content = await readFile(filePath, "utf8");
4228
4212
  return normalizeLineEndings(content);
4229
4213
  }
4230
- async function readJsonFile(filePath) {
4231
- const content = await readFile(filePath, "utf8");
4232
- return JSON.parse(content);
4233
- }
4234
4214
  async function findGitRoot(startPath) {
4235
4215
  let currentDir = path.dirname(path.resolve(startPath));
4236
4216
  const root2 = path.parse(currentDir).root;
@@ -4574,8 +4554,7 @@ function normalizeCodexLogFormat(value) {
4574
4554
  }
4575
4555
  function resolveMockConfig(target) {
4576
4556
  const response = typeof target.response === "string" ? target.response : void 0;
4577
- const trace2 = Array.isArray(target.trace) ? target.trace : void 0;
4578
- return { response, trace: trace2 };
4557
+ return { response };
4579
4558
  }
4580
4559
  function resolveVSCodeConfig(target, env, insiders) {
4581
4560
  const workspaceTemplateEnvVar = resolveOptionalLiteralString(
@@ -4595,9 +4574,9 @@ function resolveVSCodeConfig(target, env, insiders) {
4595
4574
  const dryRunSource = target.dry_run ?? target.dryRun;
4596
4575
  const subagentRootSource = target.subagent_root ?? target.subagentRoot;
4597
4576
  const defaultCommand = insiders ? "code-insiders" : "code";
4598
- const command5 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
4577
+ const command6 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
4599
4578
  return {
4600
- command: command5,
4579
+ command: command6,
4601
4580
  waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
4602
4581
  dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
4603
4582
  subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
@@ -4613,6 +4592,9 @@ function resolveCliConfig(target, env, evalFilePath) {
4613
4592
  target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
4614
4593
  );
4615
4594
  const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
4595
+ const keepTempFiles = resolveOptionalBoolean(
4596
+ target.keep_temp_files ?? target.keepTempFiles ?? target.keep_output_files ?? target.keepOutputFiles
4597
+ );
4616
4598
  let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
4617
4599
  allowLiteral: true,
4618
4600
  optionalEnv: true
@@ -4641,7 +4623,8 @@ function resolveCliConfig(target, env, evalFilePath) {
4641
4623
  cwd,
4642
4624
  timeoutMs,
4643
4625
  healthcheck,
4644
- verbose
4626
+ verbose,
4627
+ keepTempFiles
4645
4628
  };
4646
4629
  }
4647
4630
  function resolveTimeoutMs(source2, description) {
@@ -4891,6 +4874,21 @@ var PROVIDER_ALIASES = [
4891
4874
  "vertex"
4892
4875
  // legacy/future support
4893
4876
  ];
4877
+ function extractLastAssistantContent(messages) {
4878
+ if (!messages || messages.length === 0) {
4879
+ return "";
4880
+ }
4881
+ for (let i = messages.length - 1; i >= 0; i--) {
4882
+ const msg = messages[i];
4883
+ if (msg.role === "assistant" && msg.content !== void 0) {
4884
+ if (typeof msg.content === "string") {
4885
+ return msg.content;
4886
+ }
4887
+ return JSON.stringify(msg.content);
4888
+ }
4889
+ }
4890
+ return "";
4891
+ }
4894
4892
  function isAgentProvider(provider) {
4895
4893
  return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
4896
4894
  }
@@ -6001,10 +5999,10 @@ function assignProp(target, prop, value) {
6001
5999
  configurable: true
6002
6000
  });
6003
6001
  }
6004
- function getElementAtPath(obj, path27) {
6005
- if (!path27)
6002
+ function getElementAtPath(obj, path28) {
6003
+ if (!path28)
6006
6004
  return obj;
6007
- return path27.reduce((acc, key2) => acc?.[key2], obj);
6005
+ return path28.reduce((acc, key2) => acc?.[key2], obj);
6008
6006
  }
6009
6007
  function promiseAllObject(promisesObj) {
6010
6008
  const keys = Object.keys(promisesObj);
@@ -6324,11 +6322,11 @@ function aborted(x, startIndex = 0) {
6324
6322
  }
6325
6323
  return false;
6326
6324
  }
6327
- function prefixIssues(path27, issues) {
6325
+ function prefixIssues(path28, issues) {
6328
6326
  return issues.map((iss) => {
6329
6327
  var _a17;
6330
6328
  (_a17 = iss).path ?? (_a17.path = []);
6331
- iss.path.unshift(path27);
6329
+ iss.path.unshift(path28);
6332
6330
  return iss;
6333
6331
  });
6334
6332
  }
@@ -6465,7 +6463,7 @@ function treeifyError(error40, _mapper) {
6465
6463
  return issue2.message;
6466
6464
  };
6467
6465
  const result = { errors: [] };
6468
- const processError = (error41, path27 = []) => {
6466
+ const processError = (error41, path28 = []) => {
6469
6467
  var _a17, _b8;
6470
6468
  for (const issue2 of error41.issues) {
6471
6469
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6475,7 +6473,7 @@ function treeifyError(error40, _mapper) {
6475
6473
  } else if (issue2.code === "invalid_element") {
6476
6474
  processError({ issues: issue2.issues }, issue2.path);
6477
6475
  } else {
6478
- const fullpath = [...path27, ...issue2.path];
6476
+ const fullpath = [...path28, ...issue2.path];
6479
6477
  if (fullpath.length === 0) {
6480
6478
  result.errors.push(mapper(issue2));
6481
6479
  continue;
@@ -6505,9 +6503,9 @@ function treeifyError(error40, _mapper) {
6505
6503
  processError(error40);
6506
6504
  return result;
6507
6505
  }
6508
- function toDotPath(path27) {
6506
+ function toDotPath(path28) {
6509
6507
  const segs = [];
6510
- for (const seg of path27) {
6508
+ for (const seg of path28) {
6511
6509
  if (typeof seg === "number")
6512
6510
  segs.push(`[${seg}]`);
6513
6511
  else if (typeof seg === "symbol")
@@ -8106,7 +8104,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
8106
8104
  defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
8107
8105
  defineLazy(inst._zod, "values", () => {
8108
8106
  if (def.options.every((o) => o._zod.values)) {
8109
- return new Set(def.options.flatMap((option4) => Array.from(option4._zod.values)));
8107
+ return new Set(def.options.flatMap((option5) => Array.from(option5._zod.values)));
8110
8108
  }
8111
8109
  return void 0;
8112
8110
  });
@@ -8120,8 +8118,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
8120
8118
  inst._zod.parse = (payload, ctx) => {
8121
8119
  let async = false;
8122
8120
  const results = [];
8123
- for (const option4 of def.options) {
8124
- const result = option4._zod.run({
8121
+ for (const option5 of def.options) {
8122
+ const result = option5._zod.run({
8125
8123
  value: payload.value,
8126
8124
  issues: []
8127
8125
  }, ctx);
@@ -8146,10 +8144,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
8146
8144
  const _super = inst._zod.parse;
8147
8145
  defineLazy(inst._zod, "propValues", () => {
8148
8146
  const propValues = {};
8149
- for (const option4 of def.options) {
8150
- const pv = option4._zod.propValues;
8147
+ for (const option5 of def.options) {
8148
+ const pv = option5._zod.propValues;
8151
8149
  if (!pv || Object.keys(pv).length === 0)
8152
- throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option4)}"`);
8150
+ throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option5)}"`);
8153
8151
  for (const [k, v] of Object.entries(pv)) {
8154
8152
  if (!propValues[k])
8155
8153
  propValues[k] = /* @__PURE__ */ new Set();
@@ -15353,8 +15351,8 @@ function isTransforming(_schema, _ctx) {
15353
15351
  return false;
15354
15352
  }
15355
15353
  case "union": {
15356
- for (const option4 of def.options) {
15357
- if (isTransforming(option4, ctx))
15354
+ for (const option5 of def.options) {
15355
+ if (isTransforming(option5, ctx))
15358
15356
  return true;
15359
15357
  }
15360
15358
  return false;
@@ -26060,14 +26058,14 @@ function createAzure(options = {}) {
26060
26058
  description: "Azure OpenAI resource name"
26061
26059
  });
26062
26060
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26063
- const url2 = ({ path: path27, modelId }) => {
26061
+ const url2 = ({ path: path28, modelId }) => {
26064
26062
  var _a24;
26065
26063
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26066
26064
  let fullUrl;
26067
26065
  if (options.useDeploymentBasedUrls) {
26068
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path27}`);
26066
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path28}`);
26069
26067
  } else {
26070
- fullUrl = new URL(`${baseUrlPrefix}/v1${path27}`);
26068
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path28}`);
26071
26069
  }
26072
26070
  fullUrl.searchParams.set("api-version", apiVersion);
26073
26071
  return fullUrl.toString();
@@ -34595,33 +34593,22 @@ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
34595
34593
  function isEvaluatorKind(value) {
34596
34594
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
34597
34595
  }
34598
- function isTraceEventType(value) {
34599
- return typeof value === "string" && ["model_step", "tool_call", "tool_result", "message", "error"].includes(value);
34600
- }
34601
- function isTraceEvent(value) {
34602
- if (typeof value !== "object" || value === null) {
34603
- return false;
34604
- }
34605
- const candidate = value;
34606
- return isTraceEventType(candidate.type) && typeof candidate.timestamp === "string";
34607
- }
34608
- function computeTraceSummary(trace2) {
34596
+ function computeTraceSummary(messages) {
34609
34597
  const toolCallCounts = {};
34610
- let errorCount = 0;
34611
- for (const event of trace2) {
34612
- if (event.type === "tool_call" && event.name) {
34613
- toolCallCounts[event.name] = (toolCallCounts[event.name] ?? 0) + 1;
34614
- }
34615
- if (event.type === "error") {
34616
- errorCount++;
34598
+ let totalToolCalls = 0;
34599
+ for (const message of messages) {
34600
+ if (!message.toolCalls) continue;
34601
+ for (const toolCall of message.toolCalls) {
34602
+ toolCallCounts[toolCall.tool] = (toolCallCounts[toolCall.tool] ?? 0) + 1;
34603
+ totalToolCalls++;
34617
34604
  }
34618
34605
  }
34619
34606
  const toolNames = Object.keys(toolCallCounts).sort();
34620
34607
  return {
34621
- eventCount: trace2.length,
34608
+ eventCount: totalToolCalls,
34622
34609
  toolNames,
34623
34610
  toolCallsByName: toolCallCounts,
34624
- errorCount
34611
+ errorCount: 0
34625
34612
  };
34626
34613
  }
34627
34614
  function extractCodeBlocks(segments) {
@@ -34869,7 +34856,8 @@ var TEMPLATE_VARIABLES = {
34869
34856
  QUESTION: "question",
34870
34857
  EXPECTED_OUTCOME: "expected_outcome",
34871
34858
  REFERENCE_ANSWER: "reference_answer",
34872
- INPUT_MESSAGES: "input_messages"
34859
+ INPUT_MESSAGES: "input_messages",
34860
+ OUTPUT_MESSAGES: "output_messages"
34873
34861
  };
34874
34862
  var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
34875
34863
  var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
@@ -35738,16 +35726,16 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35738
35726
  }) : [];
35739
35727
  const codeSnippets = extractCodeBlocks(inputSegments);
35740
35728
  let referenceAnswer = "";
35741
- if (outputSegments.length > 1) {
35742
- referenceAnswer = JSON.stringify(outputSegments, null, 2);
35743
- } else if (outputSegments.length === 1) {
35744
- const singleMessage = outputSegments[0];
35745
- if (typeof singleMessage.content === "string") {
35746
- referenceAnswer = singleMessage.content;
35747
- } else if (singleMessage.content) {
35748
- referenceAnswer = JSON.stringify(singleMessage, null, 2);
35749
- } else if (singleMessage.tool_calls) {
35750
- referenceAnswer = JSON.stringify(singleMessage, null, 2);
35729
+ if (outputSegments.length > 0) {
35730
+ const lastMessage = outputSegments[outputSegments.length - 1];
35731
+ const content = lastMessage.content;
35732
+ const toolCalls = lastMessage.tool_calls;
35733
+ if (typeof content === "string") {
35734
+ referenceAnswer = content;
35735
+ } else if (content !== void 0 && content !== null) {
35736
+ referenceAnswer = JSON.stringify(content, null, 2);
35737
+ } else if (toolCalls !== void 0 && toolCalls !== null) {
35738
+ referenceAnswer = JSON.stringify(toolCalls, null, 2);
35751
35739
  }
35752
35740
  }
35753
35741
  const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
@@ -36069,11 +36057,11 @@ async function invokeModel(options) {
36069
36057
  return mapResponse(result);
36070
36058
  }
36071
36059
  function mapResponse(result) {
36060
+ const content = result.text ?? "";
36072
36061
  return {
36073
- text: result.text ?? "",
36074
- reasoning: result.reasoningText ?? void 0,
36075
36062
  raw: result,
36076
- usage: toJsonObject(result.totalUsage ?? result.usage)
36063
+ usage: toJsonObject(result.totalUsage ?? result.usage),
36064
+ outputMessages: [{ role: "assistant", content }]
36077
36065
  };
36078
36066
  }
36079
36067
  function toJsonObject(value) {
@@ -36180,7 +36168,7 @@ async function withRetry(fn, retryConfig, signal) {
36180
36168
  }
36181
36169
  var execAsync2 = promisify2(execWithCallback);
36182
36170
  var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
36183
- async function defaultCommandRunner(command5, options) {
36171
+ async function defaultCommandRunner(command6, options) {
36184
36172
  const execOptions = {
36185
36173
  cwd: options.cwd,
36186
36174
  env: options.env,
@@ -36190,7 +36178,7 @@ async function defaultCommandRunner(command5, options) {
36190
36178
  shell: process.platform === "win32" ? "powershell.exe" : void 0
36191
36179
  };
36192
36180
  try {
36193
- const { stdout, stderr } = await execAsync2(command5, execOptions);
36181
+ const { stdout, stderr } = await execAsync2(command6, execOptions);
36194
36182
  return {
36195
36183
  stdout,
36196
36184
  stderr,
@@ -36219,6 +36207,7 @@ var CliProvider = class {
36219
36207
  config;
36220
36208
  runCommand;
36221
36209
  verbose;
36210
+ keepTempFiles;
36222
36211
  healthcheckPromise;
36223
36212
  constructor(targetName, config2, runner = defaultCommandRunner) {
36224
36213
  this.targetName = targetName;
@@ -36226,6 +36215,7 @@ var CliProvider = class {
36226
36215
  this.config = config2;
36227
36216
  this.runCommand = runner;
36228
36217
  this.verbose = config2.verbose ?? false;
36218
+ this.keepTempFiles = config2.keepTempFiles ?? false;
36229
36219
  }
36230
36220
  async invoke(request) {
36231
36221
  if (request.signal?.aborted) {
@@ -36263,8 +36253,7 @@ var CliProvider = class {
36263
36253
  const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
36264
36254
  const parsed = this.parseOutputContent(responseContent);
36265
36255
  return {
36266
- text: parsed.text,
36267
- trace: parsed.trace,
36256
+ outputMessages: parsed.outputMessages,
36268
36257
  raw: {
36269
36258
  command: renderedCommand,
36270
36259
  stderr: result.stderr,
@@ -36343,7 +36332,7 @@ var CliProvider = class {
36343
36332
  const evalCaseId = request.evalCaseId;
36344
36333
  if (!evalCaseId) {
36345
36334
  return {
36346
- text: "",
36335
+ outputMessages: [],
36347
36336
  raw: {
36348
36337
  command: renderedCommand,
36349
36338
  stderr: result.stderr,
@@ -36356,7 +36345,7 @@ var CliProvider = class {
36356
36345
  const parsed = recordsById.get(evalCaseId);
36357
36346
  if (!parsed) {
36358
36347
  return {
36359
- text: "",
36348
+ outputMessages: [],
36360
36349
  raw: {
36361
36350
  command: renderedCommand,
36362
36351
  stderr: result.stderr,
@@ -36367,9 +36356,7 @@ var CliProvider = class {
36367
36356
  };
36368
36357
  }
36369
36358
  return {
36370
- text: parsed.text,
36371
- trace: parsed.trace,
36372
- traceRef: parsed.traceRef,
36359
+ outputMessages: parsed.outputMessages,
36373
36360
  raw: {
36374
36361
  command: renderedCommand,
36375
36362
  stderr: result.stderr,
@@ -36384,28 +36371,81 @@ var CliProvider = class {
36384
36371
  }
36385
36372
  /**
36386
36373
  * Parse output content from CLI.
36387
- * If the content is valid JSON with a 'text' field, extract text and optional trace.
36388
- * Otherwise, treat the entire content as plain text.
36374
+ * If the content is valid JSON with 'output_messages' or 'text' field, extract them.
36375
+ * If only 'text' is provided, wrap it in outputMessages.
36376
+ * Otherwise, treat the entire content as plain text wrapped in outputMessages.
36389
36377
  */
36390
36378
  parseOutputContent(content) {
36391
36379
  try {
36392
36380
  const parsed = JSON.parse(content);
36393
- if (typeof parsed === "object" && parsed !== null && "text" in parsed) {
36381
+ if (typeof parsed === "object" && parsed !== null) {
36394
36382
  const obj = parsed;
36395
- const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
36396
- const trace2 = this.parseTrace(obj.trace);
36397
- return { text: text2, trace: trace2 };
36383
+ const outputMessages = this.parseOutputMessages(obj.output_messages);
36384
+ if (outputMessages && outputMessages.length > 0) {
36385
+ return { outputMessages };
36386
+ }
36387
+ if ("text" in obj) {
36388
+ const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
36389
+ return { outputMessages: [{ role: "assistant", content: text2 }] };
36390
+ }
36398
36391
  }
36399
36392
  } catch {
36400
36393
  }
36401
- return { text: content };
36394
+ return { outputMessages: [{ role: "assistant", content }] };
36402
36395
  }
36403
- parseTrace(trace2) {
36404
- if (!Array.isArray(trace2)) {
36396
+ /**
36397
+ * Parse output_messages from JSONL (snake_case) and convert to OutputMessage[] (camelCase).
36398
+ */
36399
+ parseOutputMessages(outputMessages) {
36400
+ if (!Array.isArray(outputMessages)) {
36405
36401
  return void 0;
36406
36402
  }
36407
- const validEvents = trace2.filter(isTraceEvent);
36408
- return validEvents.length > 0 ? validEvents : void 0;
36403
+ const messages = [];
36404
+ for (const msg of outputMessages) {
36405
+ if (typeof msg !== "object" || msg === null) {
36406
+ continue;
36407
+ }
36408
+ const rawMsg = msg;
36409
+ if (typeof rawMsg.role !== "string") {
36410
+ continue;
36411
+ }
36412
+ const message = {
36413
+ role: rawMsg.role,
36414
+ name: typeof rawMsg.name === "string" ? rawMsg.name : void 0,
36415
+ content: rawMsg.content,
36416
+ toolCalls: this.parseToolCalls(rawMsg.tool_calls),
36417
+ timestamp: typeof rawMsg.timestamp === "string" ? rawMsg.timestamp : void 0,
36418
+ metadata: typeof rawMsg.metadata === "object" && rawMsg.metadata !== null ? rawMsg.metadata : void 0
36419
+ };
36420
+ messages.push(message);
36421
+ }
36422
+ return messages.length > 0 ? messages : void 0;
36423
+ }
36424
+ /**
36425
+ * Parse tool_calls from JSONL (snake_case) and convert to ToolCall[] format.
36426
+ */
36427
+ parseToolCalls(toolCalls) {
36428
+ if (!Array.isArray(toolCalls)) {
36429
+ return void 0;
36430
+ }
36431
+ const calls = [];
36432
+ for (const call of toolCalls) {
36433
+ if (typeof call !== "object" || call === null) {
36434
+ continue;
36435
+ }
36436
+ const rawCall = call;
36437
+ if (typeof rawCall.tool !== "string") {
36438
+ continue;
36439
+ }
36440
+ calls.push({
36441
+ tool: rawCall.tool,
36442
+ input: rawCall.input,
36443
+ output: rawCall.output,
36444
+ id: typeof rawCall.id === "string" ? rawCall.id : void 0,
36445
+ timestamp: typeof rawCall.timestamp === "string" ? rawCall.timestamp : void 0
36446
+ });
36447
+ }
36448
+ return calls.length > 0 ? calls : void 0;
36409
36449
  }
36410
36450
  parseJsonlBatchOutput(content) {
36411
36451
  const records = /* @__PURE__ */ new Map();
@@ -36429,12 +36469,16 @@ var CliProvider = class {
36429
36469
  if (records.has(id)) {
36430
36470
  throw new Error(`CLI batch output contains duplicate id: ${id}`);
36431
36471
  }
36432
- const text2 = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
36433
- const traceRef = typeof obj.traceRef === "string" ? obj.traceRef : typeof obj.trace_ref === "string" ? obj.trace_ref : void 0;
36472
+ const parsedOutputMessages = this.parseOutputMessages(obj.output_messages);
36473
+ let outputMessages;
36474
+ if (parsedOutputMessages && parsedOutputMessages.length > 0) {
36475
+ outputMessages = parsedOutputMessages;
36476
+ } else {
36477
+ const text2 = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
36478
+ outputMessages = text2 ? [{ role: "assistant", content: text2 }] : [];
36479
+ }
36434
36480
  records.set(id, {
36435
- text: text2,
36436
- trace: this.parseTrace(obj.trace),
36437
- traceRef
36481
+ outputMessages
36438
36482
  });
36439
36483
  }
36440
36484
  return records;
@@ -36447,8 +36491,10 @@ var CliProvider = class {
36447
36491
  const errorMsg = error40 instanceof Error ? error40.message : String(error40);
36448
36492
  throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
36449
36493
  } finally {
36450
- await fs.unlink(filePath).catch(() => {
36451
- });
36494
+ if (!this.keepTempFiles) {
36495
+ await fs.unlink(filePath).catch(() => {
36496
+ });
36497
+ }
36452
36498
  }
36453
36499
  }
36454
36500
  async ensureHealthy(signal) {
@@ -36768,7 +36814,6 @@ var CodexProvider = class {
36768
36814
  const parsed = parseCodexJson(result.stdout);
36769
36815
  const assistantText = extractAssistantText(parsed);
36770
36816
  return {
36771
- text: assistantText,
36772
36817
  raw: {
36773
36818
  response: parsed,
36774
36819
  stdout: result.stdout,
@@ -36780,7 +36825,8 @@ var CodexProvider = class {
36780
36825
  workspace: workspaceRoot,
36781
36826
  inputFiles,
36782
36827
  logFile: logger?.filePath
36783
- }
36828
+ },
36829
+ outputMessages: [{ role: "assistant", content: assistantText }]
36784
36830
  };
36785
36831
  } finally {
36786
36832
  await logger?.close();
@@ -37400,7 +37446,6 @@ var MockProvider = class {
37400
37446
  delayMs;
37401
37447
  delayMinMs;
37402
37448
  delayMaxMs;
37403
- trace;
37404
37449
  constructor(targetName, config2) {
37405
37450
  this.id = `mock:${targetName}`;
37406
37451
  this.targetName = targetName;
@@ -37408,7 +37453,6 @@ var MockProvider = class {
37408
37453
  this.delayMs = config2.delayMs ?? 0;
37409
37454
  this.delayMinMs = config2.delayMinMs ?? 0;
37410
37455
  this.delayMaxMs = config2.delayMaxMs ?? 0;
37411
- this.trace = config2.trace;
37412
37456
  }
37413
37457
  async invoke(request) {
37414
37458
  const delay2 = this.calculateDelay();
@@ -37416,12 +37460,11 @@ var MockProvider = class {
37416
37460
  await new Promise((resolve2) => setTimeout(resolve2, delay2));
37417
37461
  }
37418
37462
  return {
37419
- text: this.cannedResponse,
37463
+ outputMessages: [{ role: "assistant", content: this.cannedResponse }],
37420
37464
  raw: {
37421
37465
  question: request.question,
37422
37466
  guidelines: request.guidelines
37423
- },
37424
- trace: this.trace
37467
+ }
37425
37468
  };
37426
37469
  }
37427
37470
  calculateDelay() {
@@ -37501,7 +37544,7 @@ var VSCodeProvider = class {
37501
37544
  }
37502
37545
  if (this.config.dryRun) {
37503
37546
  return {
37504
- text: "",
37547
+ outputMessages: [],
37505
37548
  raw: {
37506
37549
  session,
37507
37550
  inputFiles
@@ -37510,7 +37553,7 @@ var VSCodeProvider = class {
37510
37553
  }
37511
37554
  const responseText = await readTextFile(session.responseFile);
37512
37555
  return {
37513
- text: responseText,
37556
+ outputMessages: [{ role: "assistant", content: responseText }],
37514
37557
  raw: {
37515
37558
  session,
37516
37559
  inputFiles
@@ -37548,7 +37591,7 @@ var VSCodeProvider = class {
37548
37591
  }
37549
37592
  if (this.config.dryRun) {
37550
37593
  return normalizedRequests.map(({ inputFiles }) => ({
37551
- text: "",
37594
+ outputMessages: [],
37552
37595
  raw: {
37553
37596
  session,
37554
37597
  inputFiles,
@@ -37565,7 +37608,7 @@ var VSCodeProvider = class {
37565
37608
  for (const [index, responseFile] of session.responseFiles.entries()) {
37566
37609
  const responseText = await readTextFile(responseFile);
37567
37610
  responses.push({
37568
- text: responseText,
37611
+ outputMessages: [{ role: "assistant", content: responseText }],
37569
37612
  raw: {
37570
37613
  session,
37571
37614
  inputFiles: normalizedRequests[index]?.inputFiles,
@@ -37853,6 +37896,7 @@ var LlmJudgeEvaluator = class {
37853
37896
  null,
37854
37897
  2
37855
37898
  ),
37899
+ [TEMPLATE_VARIABLES.OUTPUT_MESSAGES]: JSON.stringify(context.outputMessages ?? [], null, 2),
37856
37900
  [TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
37857
37901
  [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
37858
37902
  [TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
@@ -37877,7 +37921,7 @@ var LlmJudgeEvaluator = class {
37877
37921
  const score = clampScore(data.score);
37878
37922
  const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
37879
37923
  const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
37880
- const reasoning = data.reasoning ?? providerResponse?.reasoning;
37924
+ const reasoning = data.reasoning;
37881
37925
  const expectedAspectCount = Math.max(hits.length + misses.length, 1);
37882
37926
  return {
37883
37927
  score,
@@ -37979,7 +38023,9 @@ var LlmJudgeEvaluator = class {
37979
38023
  maxOutputTokens: this.maxOutputTokens,
37980
38024
  temperature: this.temperature
37981
38025
  });
37982
- const data = schema.parse(parseJsonFromText(response.text ?? ""));
38026
+ const data = schema.parse(
38027
+ parseJsonFromText(extractLastAssistantContent(response.outputMessages))
38028
+ );
37983
38029
  return { data, providerResponse: response };
37984
38030
  } catch (e) {
37985
38031
  lastError = e instanceof Error ? e : new Error(String(e));
@@ -38065,13 +38111,13 @@ var CodeEvaluator = class {
38065
38111
  expected_messages: context.evalCase.expected_messages,
38066
38112
  reference_answer: context.evalCase.reference_answer,
38067
38113
  candidate_answer: context.candidate,
38114
+ output_messages: context.outputMessages ?? null,
38068
38115
  guideline_files: context.evalCase.guideline_paths,
38069
38116
  input_files: context.evalCase.file_paths.filter(
38070
38117
  (path132) => !context.evalCase.guideline_paths.includes(path132)
38071
38118
  ),
38072
38119
  input_messages: context.evalCase.input_messages,
38073
- candidate_trace_file: context.candidateTraceRef ?? null,
38074
- candidate_trace_summary: context.candidateTraceSummary ?? null
38120
+ candidate_trace_summary: context.traceSummary ?? null
38075
38121
  },
38076
38122
  null,
38077
38123
  2
@@ -38198,8 +38244,19 @@ var ToolTrajectoryEvaluator = class {
38198
38244
  this.config = options.config;
38199
38245
  }
38200
38246
  evaluate(context) {
38201
- const { candidateTrace, candidateTraceSummary } = context;
38202
- if (!candidateTrace || !candidateTraceSummary) {
38247
+ const { outputMessages, traceSummary } = context;
38248
+ const toolCalls = this.extractToolCallsFromMessages(outputMessages);
38249
+ if (toolCalls.length === 0 && !traceSummary) {
38250
+ return {
38251
+ score: 0,
38252
+ verdict: "fail",
38253
+ hits: [],
38254
+ misses: ["No trace available for evaluation"],
38255
+ expectedAspectCount: 1
38256
+ };
38257
+ }
38258
+ const summary = toolCalls.length > 0 ? this.buildSummary(toolCalls) : traceSummary;
38259
+ if (!summary) {
38203
38260
  return {
38204
38261
  score: 0,
38205
38262
  verdict: "fail",
@@ -38210,11 +38267,11 @@ var ToolTrajectoryEvaluator = class {
38210
38267
  }
38211
38268
  switch (this.config.mode) {
38212
38269
  case "any_order":
38213
- return this.evaluateAnyOrder(candidateTraceSummary);
38270
+ return this.evaluateAnyOrder(summary);
38214
38271
  case "in_order":
38215
- return this.evaluateInOrder(candidateTrace);
38272
+ return this.evaluateInOrder(toolCalls);
38216
38273
  case "exact":
38217
- return this.evaluateExact(candidateTrace);
38274
+ return this.evaluateExact(toolCalls);
38218
38275
  default:
38219
38276
  return {
38220
38277
  score: 0,
@@ -38225,6 +38282,39 @@ var ToolTrajectoryEvaluator = class {
38225
38282
  };
38226
38283
  }
38227
38284
  }
38285
+ /**
38286
+ * Extract tool calls from output messages.
38287
+ */
38288
+ extractToolCallsFromMessages(messages) {
38289
+ if (!messages) {
38290
+ return [];
38291
+ }
38292
+ const toolCalls = [];
38293
+ for (const message of messages) {
38294
+ if (message.toolCalls) {
38295
+ for (const call of message.toolCalls) {
38296
+ toolCalls.push({ name: call.tool });
38297
+ }
38298
+ }
38299
+ }
38300
+ return toolCalls;
38301
+ }
38302
+ /**
38303
+ * Build a summary from extracted tool calls.
38304
+ */
38305
+ buildSummary(toolCalls) {
38306
+ const toolCallsByName = {};
38307
+ for (const call of toolCalls) {
38308
+ toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1;
38309
+ }
38310
+ const toolNames = Object.keys(toolCallsByName).sort();
38311
+ return {
38312
+ eventCount: toolCalls.length,
38313
+ toolNames,
38314
+ toolCallsByName,
38315
+ errorCount: 0
38316
+ };
38317
+ }
38228
38318
  evaluateAnyOrder(summary) {
38229
38319
  const minimums = this.config.minimums ?? {};
38230
38320
  const toolNames = Object.keys(minimums);
@@ -38257,7 +38347,7 @@ var ToolTrajectoryEvaluator = class {
38257
38347
  expectedAspectCount: toolNames.length
38258
38348
  };
38259
38349
  }
38260
- evaluateInOrder(trace2) {
38350
+ evaluateInOrder(toolCalls) {
38261
38351
  const expected = this.config.expected ?? [];
38262
38352
  if (expected.length === 0) {
38263
38353
  return {
@@ -38268,15 +38358,14 @@ var ToolTrajectoryEvaluator = class {
38268
38358
  expectedAspectCount: 0
38269
38359
  };
38270
38360
  }
38271
- const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
38272
38361
  const hits = [];
38273
38362
  const misses = [];
38274
38363
  let actualIndex = 0;
38275
38364
  for (let i = 0; i < expected.length; i++) {
38276
38365
  const expectedTool = expected[i].tool;
38277
38366
  let found = false;
38278
- while (actualIndex < actualToolCalls.length) {
38279
- if (actualToolCalls[actualIndex].name === expectedTool) {
38367
+ while (actualIndex < toolCalls.length) {
38368
+ if (toolCalls[actualIndex].name === expectedTool) {
38280
38369
  hits.push(`Found ${expectedTool} at position ${actualIndex}`);
38281
38370
  actualIndex++;
38282
38371
  found = true;
@@ -38297,7 +38386,7 @@ var ToolTrajectoryEvaluator = class {
38297
38386
  expectedAspectCount: expected.length
38298
38387
  };
38299
38388
  }
38300
- evaluateExact(trace2) {
38389
+ evaluateExact(toolCalls) {
38301
38390
  const expected = this.config.expected ?? [];
38302
38391
  if (expected.length === 0) {
38303
38392
  return {
@@ -38308,16 +38397,15 @@ var ToolTrajectoryEvaluator = class {
38308
38397
  expectedAspectCount: 0
38309
38398
  };
38310
38399
  }
38311
- const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
38312
38400
  const hits = [];
38313
38401
  const misses = [];
38314
- if (actualToolCalls.length !== expected.length) {
38315
- misses.push(`Expected ${expected.length} tool calls, got ${actualToolCalls.length}`);
38402
+ if (toolCalls.length !== expected.length) {
38403
+ misses.push(`Expected ${expected.length} tool calls, got ${toolCalls.length}`);
38316
38404
  }
38317
- const checkLength = Math.min(expected.length, actualToolCalls.length);
38405
+ const checkLength = Math.min(expected.length, toolCalls.length);
38318
38406
  for (let i = 0; i < checkLength; i++) {
38319
38407
  const expectedTool = expected[i].tool;
38320
- const actualTool = actualToolCalls[i].name;
38408
+ const actualTool = toolCalls[i].name;
38321
38409
  if (actualTool === expectedTool) {
38322
38410
  hits.push(`Position ${i}: ${expectedTool} \u2713`);
38323
38411
  } else {
@@ -38531,11 +38619,13 @@ var CompositeEvaluator = class {
38531
38619
  evalCaseId: context.evalCase.id,
38532
38620
  attempt: context.attempt
38533
38621
  });
38534
- const data = freeformEvaluationSchema.parse(parseJsonFromText(response.text ?? ""));
38622
+ const data = freeformEvaluationSchema.parse(
38623
+ parseJsonFromText(extractLastAssistantContent(response.outputMessages))
38624
+ );
38535
38625
  const score = clampScore(data.score);
38536
38626
  const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
38537
38627
  const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
38538
- const reasoning = data.reasoning ?? response.reasoning;
38628
+ const reasoning = data.reasoning;
38539
38629
  return {
38540
38630
  score,
38541
38631
  verdict: scoreToVerdict(score),
@@ -38947,11 +39037,14 @@ async function runBatchEvaluation(options) {
38947
39037
  const evalCase = evalCases[i];
38948
39038
  const promptInputs = promptInputsList[i];
38949
39039
  const providerResponse = batchResponse[i];
39040
+ const outputMessages = providerResponse.outputMessages;
39041
+ const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
39042
+ const candidate = extractLastAssistantContent(outputMessages);
38950
39043
  let result;
38951
39044
  try {
38952
39045
  result = await evaluateCandidate({
38953
39046
  evalCase,
38954
- candidate: providerResponse.text ?? "",
39047
+ candidate,
38955
39048
  target,
38956
39049
  provider,
38957
39050
  evaluators: evaluatorRegistry,
@@ -38959,7 +39052,9 @@ async function runBatchEvaluation(options) {
38959
39052
  nowFn,
38960
39053
  attempt: 0,
38961
39054
  judgeProvider: await resolveJudgeProvider(target),
38962
- agentTimeoutMs
39055
+ agentTimeoutMs,
39056
+ outputMessages,
39057
+ traceSummary
38963
39058
  });
38964
39059
  } catch (error40) {
38965
39060
  const errorResult = buildErrorResult(
@@ -39063,21 +39158,13 @@ async function runEvalCase(options) {
39063
39158
  if (cacheKey && cache && !cachedResponse) {
39064
39159
  await cache.set(cacheKey, providerResponse);
39065
39160
  }
39066
- let candidateTrace = providerResponse.trace;
39067
- if (!candidateTrace && providerResponse.traceRef) {
39068
- try {
39069
- const rawTrace = await readJsonFile(providerResponse.traceRef);
39070
- if (Array.isArray(rawTrace) && rawTrace.every(isTraceEvent)) {
39071
- candidateTrace = rawTrace;
39072
- }
39073
- } catch {
39074
- }
39075
- }
39076
- const candidateTraceSummary = candidateTrace ? computeTraceSummary(candidateTrace) : void 0;
39161
+ const outputMessages = providerResponse.outputMessages;
39162
+ const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
39163
+ const candidate = extractLastAssistantContent(outputMessages);
39077
39164
  try {
39078
39165
  return await evaluateCandidate({
39079
39166
  evalCase,
39080
- candidate: providerResponse.text ?? "",
39167
+ candidate,
39081
39168
  target,
39082
39169
  provider,
39083
39170
  evaluators,
@@ -39086,9 +39173,8 @@ async function runEvalCase(options) {
39086
39173
  attempt,
39087
39174
  judgeProvider,
39088
39175
  agentTimeoutMs,
39089
- candidateTrace,
39090
- candidateTraceRef: providerResponse.traceRef,
39091
- candidateTraceSummary
39176
+ outputMessages,
39177
+ traceSummary
39092
39178
  });
39093
39179
  } catch (error40) {
39094
39180
  return buildErrorResult(evalCase, target.name, nowFn(), error40, promptInputs, provider);
@@ -39106,9 +39192,8 @@ async function evaluateCandidate(options) {
39106
39192
  attempt,
39107
39193
  judgeProvider,
39108
39194
  agentTimeoutMs,
39109
- candidateTrace,
39110
- candidateTraceRef,
39111
- candidateTraceSummary
39195
+ outputMessages,
39196
+ traceSummary
39112
39197
  } = options;
39113
39198
  const gradeTimestamp = nowFn();
39114
39199
  const { score, evaluatorResults } = await runEvaluatorsForCase({
@@ -39122,9 +39207,8 @@ async function evaluateCandidate(options) {
39122
39207
  now: gradeTimestamp,
39123
39208
  judgeProvider,
39124
39209
  agentTimeoutMs,
39125
- candidateTrace,
39126
- candidateTraceRef,
39127
- candidateTraceSummary
39210
+ outputMessages,
39211
+ traceSummary
39128
39212
  });
39129
39213
  const completedAt = nowFn();
39130
39214
  let agentProviderRequest;
@@ -39162,7 +39246,7 @@ async function evaluateCandidate(options) {
39162
39246
  lm_provider_request: lmProviderRequest,
39163
39247
  evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
39164
39248
  evaluator_results: evaluatorResults,
39165
- trace_summary: candidateTraceSummary
39249
+ trace_summary: traceSummary
39166
39250
  };
39167
39251
  }
39168
39252
  async function runEvaluatorsForCase(options) {
@@ -39177,9 +39261,8 @@ async function runEvaluatorsForCase(options) {
39177
39261
  now,
39178
39262
  judgeProvider,
39179
39263
  agentTimeoutMs,
39180
- candidateTrace,
39181
- candidateTraceRef,
39182
- candidateTraceSummary
39264
+ outputMessages,
39265
+ traceSummary
39183
39266
  } = options;
39184
39267
  if (evalCase.evaluators && evalCase.evaluators.length > 0) {
39185
39268
  return runEvaluatorList({
@@ -39194,9 +39277,8 @@ async function runEvaluatorsForCase(options) {
39194
39277
  now,
39195
39278
  judgeProvider,
39196
39279
  agentTimeoutMs,
39197
- candidateTrace,
39198
- candidateTraceRef,
39199
- candidateTraceSummary
39280
+ outputMessages,
39281
+ traceSummary
39200
39282
  });
39201
39283
  }
39202
39284
  const evaluatorKind = evalCase.evaluator ?? "llm_judge";
@@ -39213,9 +39295,8 @@ async function runEvaluatorsForCase(options) {
39213
39295
  promptInputs,
39214
39296
  now,
39215
39297
  judgeProvider,
39216
- candidateTrace,
39217
- candidateTraceRef,
39218
- candidateTraceSummary
39298
+ outputMessages,
39299
+ traceSummary
39219
39300
  });
39220
39301
  return { score };
39221
39302
  }
@@ -39232,9 +39313,8 @@ async function runEvaluatorList(options) {
39232
39313
  now,
39233
39314
  judgeProvider,
39234
39315
  agentTimeoutMs,
39235
- candidateTrace,
39236
- candidateTraceRef,
39237
- candidateTraceSummary
39316
+ outputMessages,
39317
+ traceSummary
39238
39318
  } = options;
39239
39319
  const scored = [];
39240
39320
  const evaluatorResults = [];
@@ -39281,8 +39361,8 @@ async function runEvaluatorList(options) {
39281
39361
  attempt,
39282
39362
  promptInputs,
39283
39363
  now,
39284
- candidateTraceRef,
39285
- candidateTraceSummary
39364
+ outputMessages,
39365
+ traceSummary
39286
39366
  });
39287
39367
  const weight = evaluator.weight ?? 1;
39288
39368
  scored.push({ score: score2, name: evaluator.name, type: "code_judge", weight });
@@ -39368,9 +39448,8 @@ async function runEvaluatorList(options) {
39368
39448
  attempt,
39369
39449
  promptInputs,
39370
39450
  now,
39371
- candidateTrace,
39372
- candidateTraceRef,
39373
- candidateTraceSummary
39451
+ outputMessages,
39452
+ traceSummary
39374
39453
  });
39375
39454
  const weight = evaluator.weight ?? 1;
39376
39455
  scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
@@ -39730,16 +39809,90 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
39730
39809
  return parts.join("\n");
39731
39810
  }
39732
39811
 
39812
+ // src/commands/convert/index.ts
39813
+ import { command, option, optional as optional2, positional, string as string4 } from "cmd-ts";
39814
+ import { stringify as stringifyYaml } from "yaml";
39815
+ function convertJsonlToYaml(inputPath, outputPath) {
39816
+ const content = readFileSync(inputPath, "utf8");
39817
+ const lines = content.trim().split("\n").filter((line2) => line2.trim());
39818
+ let yamlOutput = "";
39819
+ let isFirst = true;
39820
+ for (const line2 of lines) {
39821
+ const record2 = JSON.parse(line2);
39822
+ const yamlDoc = stringifyYaml(record2, {
39823
+ indent: 2,
39824
+ lineWidth: 0
39825
+ });
39826
+ const normalizedYaml = normalizeLineEndings(yamlDoc);
39827
+ const separator = isFirst ? "---\n" : "\n---\n";
39828
+ isFirst = false;
39829
+ yamlOutput += separator + normalizedYaml;
39830
+ }
39831
+ writeFileSync(outputPath, yamlOutput);
39832
+ return lines.length;
39833
+ }
39834
+ var convertCommand = command({
39835
+ name: "convert",
39836
+ description: "Convert evaluation results from JSONL to YAML format",
39837
+ args: {
39838
+ input: positional({
39839
+ type: string4,
39840
+ displayName: "input",
39841
+ description: "Path to input JSONL file"
39842
+ }),
39843
+ out: option({
39844
+ type: optional2(string4),
39845
+ long: "out",
39846
+ short: "o",
39847
+ description: "Output file path (defaults to input path with .yaml extension)"
39848
+ })
39849
+ },
39850
+ handler: async ({ input, out }) => {
39851
+ if (!input.endsWith(".jsonl")) {
39852
+ console.error("Error: Input file must be a .jsonl file");
39853
+ process.exit(1);
39854
+ }
39855
+ const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
39856
+ try {
39857
+ const count = convertJsonlToYaml(input, outputPath);
39858
+ console.log(`Converted ${count} records to ${path14.resolve(outputPath)}`);
39859
+ } catch (error40) {
39860
+ console.error(`Error: ${error40.message}`);
39861
+ process.exit(1);
39862
+ }
39863
+ }
39864
+ });
39865
+
39866
+ // src/commands/eval/index.ts
39867
+ import { stat as stat4 } from "node:fs/promises";
39868
+ import path21 from "node:path";
39869
+ import {
39870
+ command as command2,
39871
+ flag,
39872
+ number as number4,
39873
+ option as option2,
39874
+ optional as optional3,
39875
+ restPositionals,
39876
+ string as string5
39877
+ } from "cmd-ts";
39878
+ import fg from "fast-glob";
39879
+
39880
+ // src/commands/eval/run-eval.ts
39881
+ import { constants as constants6 } from "node:fs";
39882
+ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
39883
+ import path20 from "node:path";
39884
+ import { pathToFileURL } from "node:url";
39885
+
39733
39886
  // src/commands/eval/env.ts
39734
39887
  import { constants as constants4 } from "node:fs";
39735
39888
  import { access as access4 } from "node:fs/promises";
39736
- import path14 from "node:path";
39889
+ import path15 from "node:path";
39737
39890
  import { config as loadDotenv } from "dotenv";
39738
39891
  function uniqueDirs(directories) {
39739
39892
  const seen = /* @__PURE__ */ new Set();
39740
39893
  const result = [];
39741
39894
  for (const dir of directories) {
39742
- const absolute = path14.resolve(dir);
39895
+ const absolute = path15.resolve(dir);
39743
39896
  if (seen.has(absolute)) {
39744
39897
  continue;
39745
39898
  }
@@ -39758,14 +39911,14 @@ async function fileExists4(filePath) {
39758
39911
  }
39759
39912
  function collectAncestorDirectories(start, boundary) {
39760
39913
  const directories = [];
39761
- const boundaryDir = path14.resolve(boundary);
39762
- let current = path14.resolve(start);
39914
+ const boundaryDir = path15.resolve(boundary);
39915
+ let current = path15.resolve(start);
39763
39916
  while (current !== void 0) {
39764
39917
  directories.push(current);
39765
39918
  if (current === boundaryDir) {
39766
39919
  break;
39767
39920
  }
39768
- const parent = path14.dirname(current);
39921
+ const parent = path15.dirname(current);
39769
39922
  if (parent === current) {
39770
39923
  break;
39771
39924
  }
@@ -39775,12 +39928,12 @@ function collectAncestorDirectories(start, boundary) {
39775
39928
  }
39776
39929
  async function loadEnvFromHierarchy(options) {
39777
39930
  const { testFilePath, repoRoot, verbose } = options;
39778
- const testDir = path14.dirname(path14.resolve(testFilePath));
39931
+ const testDir = path15.dirname(path15.resolve(testFilePath));
39779
39932
  const cwd = process.cwd();
39780
39933
  const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
39781
39934
  const envFiles = [];
39782
39935
  for (const dir of searchDirs) {
39783
- const candidate = path14.join(dir, ".env");
39936
+ const candidate = path15.join(dir, ".env");
39784
39937
  if (await fileExists4(candidate)) {
39785
39938
  envFiles.push(candidate);
39786
39939
  }
@@ -39804,7 +39957,7 @@ async function loadEnvFromHierarchy(options) {
39804
39957
  // src/commands/eval/jsonl-writer.ts
39805
39958
  import { createWriteStream as createWriteStream2 } from "node:fs";
39806
39959
  import { mkdir as mkdir5 } from "node:fs/promises";
39807
- import path15 from "node:path";
39960
+ import path16 from "node:path";
39808
39961
  import { finished } from "node:stream/promises";
39809
39962
 
39810
39963
  // ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
@@ -40022,7 +40175,7 @@ var JsonlWriter = class _JsonlWriter {
40022
40175
  this.stream = stream;
40023
40176
  }
40024
40177
  static async open(filePath) {
40025
- await mkdir5(path15.dirname(filePath), { recursive: true });
40178
+ await mkdir5(path16.dirname(filePath), { recursive: true });
40026
40179
  const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
40027
40180
  return new _JsonlWriter(stream);
40028
40181
  }
@@ -40054,9 +40207,9 @@ var JsonlWriter = class _JsonlWriter {
40054
40207
  // src/commands/eval/yaml-writer.ts
40055
40208
  import { createWriteStream as createWriteStream3 } from "node:fs";
40056
40209
  import { mkdir as mkdir6 } from "node:fs/promises";
40057
- import path16 from "node:path";
40210
+ import path17 from "node:path";
40058
40211
  import { finished as finished2 } from "node:stream/promises";
40059
- import { stringify as stringifyYaml } from "yaml";
40212
+ import { stringify as stringifyYaml2 } from "yaml";
40060
40213
  var YamlWriter = class _YamlWriter {
40061
40214
  stream;
40062
40215
  mutex = new Mutex();
@@ -40066,7 +40219,7 @@ var YamlWriter = class _YamlWriter {
40066
40219
  this.stream = stream;
40067
40220
  }
40068
40221
  static async open(filePath) {
40069
- await mkdir6(path16.dirname(filePath), { recursive: true });
40222
+ await mkdir6(path17.dirname(filePath), { recursive: true });
40070
40223
  const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
40071
40224
  return new _YamlWriter(stream);
40072
40225
  }
@@ -40075,7 +40228,7 @@ var YamlWriter = class _YamlWriter {
40075
40228
  if (this.closed) {
40076
40229
  throw new Error("Cannot write to closed YAML writer");
40077
40230
  }
40078
- const yamlDoc = stringifyYaml(record2, {
40231
+ const yamlDoc = stringifyYaml2(record2, {
40079
40232
  indent: 2,
40080
40233
  lineWidth: 0
40081
40234
  // Disable line wrapping
@@ -40185,12 +40338,12 @@ var ProgressDisplay = class {
40185
40338
  }
40186
40339
  addLogPaths(paths) {
40187
40340
  const newPaths = [];
40188
- for (const path27 of paths) {
40189
- if (this.logPathSet.has(path27)) {
40341
+ for (const path28 of paths) {
40342
+ if (this.logPathSet.has(path28)) {
40190
40343
  continue;
40191
40344
  }
40192
- this.logPathSet.add(path27);
40193
- newPaths.push(path27);
40345
+ this.logPathSet.add(path28);
40346
+ newPaths.push(path28);
40194
40347
  }
40195
40348
  if (newPaths.length === 0) {
40196
40349
  return;
@@ -40202,8 +40355,8 @@ var ProgressDisplay = class {
40202
40355
  this.hasPrintedLogHeader = true;
40203
40356
  }
40204
40357
  const startIndex = this.logPaths.length - newPaths.length;
40205
- newPaths.forEach((path27, offset) => {
40206
- console.log(`${startIndex + offset + 1}. ${path27}`);
40358
+ newPaths.forEach((path28, offset) => {
40359
+ console.log(`${startIndex + offset + 1}. ${path28}`);
40207
40360
  });
40208
40361
  }
40209
40362
  finish() {
@@ -40358,7 +40511,7 @@ function formatEvaluationSummary(summary) {
40358
40511
 
40359
40512
  // ../../packages/core/dist/evaluation/validation/index.js
40360
40513
  import { readFile as readFile7 } from "node:fs/promises";
40361
- import path17 from "node:path";
40514
+ import path18 from "node:path";
40362
40515
  import { parse as parse6 } from "yaml";
40363
40516
  import { readFile as readFile23 } from "node:fs/promises";
40364
40517
  import path23 from "node:path";
@@ -40401,8 +40554,8 @@ async function detectFileType(filePath) {
40401
40554
  }
40402
40555
  }
40403
40556
  function inferFileTypeFromPath(filePath) {
40404
- const normalized = path17.normalize(filePath).replace(/\\/g, "/");
40405
- const basename = path17.basename(filePath);
40557
+ const normalized = path18.normalize(filePath).replace(/\\/g, "/");
40558
+ const basename = path18.basename(filePath);
40406
40559
  if (normalized.includes("/.agentv/")) {
40407
40560
  if (basename === "config.yaml" || basename === "config.yml") {
40408
40561
  return "config";
@@ -40725,7 +40878,11 @@ var CLI_SETTINGS = /* @__PURE__ */ new Set([
40725
40878
  "env",
40726
40879
  "timeout_seconds",
40727
40880
  "timeoutSeconds",
40728
- "healthcheck"
40881
+ "healthcheck",
40882
+ "keep_temp_files",
40883
+ "keepTempFiles",
40884
+ "keep_output_files",
40885
+ "keepOutputFiles"
40729
40886
  ]);
40730
40887
  function getKnownSettings(provider) {
40731
40888
  const normalizedProvider = provider.toLowerCase();
@@ -41243,12 +41400,12 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
41243
41400
  // src/utils/targets.ts
41244
41401
  import { constants as constants5 } from "node:fs";
41245
41402
  import { access as access5 } from "node:fs/promises";
41246
- import path18 from "node:path";
41403
+ import path19 from "node:path";
41247
41404
  var TARGET_FILE_CANDIDATES = [
41248
41405
  "targets.yaml",
41249
41406
  "targets.yml",
41250
- path18.join(".agentv", "targets.yaml"),
41251
- path18.join(".agentv", "targets.yml")
41407
+ path19.join(".agentv", "targets.yaml"),
41408
+ path19.join(".agentv", "targets.yml")
41252
41409
  ];
41253
41410
  async function fileExists5(filePath) {
41254
41411
  try {
@@ -41261,12 +41418,12 @@ async function fileExists5(filePath) {
41261
41418
  async function discoverTargetsFile(options) {
41262
41419
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
41263
41420
  if (explicitPath) {
41264
- const resolvedExplicit = path18.resolve(explicitPath);
41421
+ const resolvedExplicit = path19.resolve(explicitPath);
41265
41422
  if (await fileExists5(resolvedExplicit)) {
41266
41423
  return resolvedExplicit;
41267
41424
  }
41268
41425
  for (const candidate of TARGET_FILE_CANDIDATES) {
41269
- const nested = path18.join(resolvedExplicit, candidate);
41426
+ const nested = path19.join(resolvedExplicit, candidate);
41270
41427
  if (await fileExists5(nested)) {
41271
41428
  return nested;
41272
41429
  }
@@ -41274,13 +41431,13 @@ async function discoverTargetsFile(options) {
41274
41431
  throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
41275
41432
  }
41276
41433
  const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
41277
- const resolvedCwd = path18.resolve(cwd);
41434
+ const resolvedCwd = path19.resolve(cwd);
41278
41435
  if (!directories.includes(resolvedCwd)) {
41279
41436
  directories.push(resolvedCwd);
41280
41437
  }
41281
41438
  for (const directory of directories) {
41282
41439
  for (const candidate of TARGET_FILE_CANDIDATES) {
41283
- const fullPath = path18.join(directory, candidate);
41440
+ const fullPath = path19.join(directory, candidate);
41284
41441
  if (await fileExists5(fullPath)) {
41285
41442
  return fullPath;
41286
41443
  }
@@ -41459,15 +41616,15 @@ async function ensureFileExists(filePath, description) {
41459
41616
  }
41460
41617
  }
41461
41618
  async function findRepoRoot(start) {
41462
- const fallback = path19.resolve(start);
41619
+ const fallback = path20.resolve(start);
41463
41620
  let current = fallback;
41464
41621
  while (current !== void 0) {
41465
- const candidate = path19.join(current, ".git");
41622
+ const candidate = path20.join(current, ".git");
41466
41623
  try {
41467
41624
  await access6(candidate, constants6.F_OK);
41468
41625
  return current;
41469
41626
  } catch {
41470
- const parent = path19.dirname(current);
41627
+ const parent = path20.dirname(current);
41471
41628
  if (parent === current) {
41472
41629
  break;
41473
41630
  }
@@ -41480,16 +41637,16 @@ function buildDefaultOutputPath(cwd, format) {
41480
41637
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
41481
41638
  const baseName = "eval";
41482
41639
  const extension = getDefaultExtension(format);
41483
- return path19.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
41640
+ return path20.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
41484
41641
  }
41485
- function resolvePromptDirectory(option4, cwd) {
41486
- if (option4 === void 0) {
41642
+ function resolvePromptDirectory(option5, cwd) {
41643
+ if (option5 === void 0) {
41487
41644
  return void 0;
41488
41645
  }
41489
- if (typeof option4 === "string" && option4.trim().length > 0) {
41490
- return path19.resolve(cwd, option4);
41646
+ if (typeof option5 === "string" && option5.trim().length > 0) {
41647
+ return path20.resolve(cwd, option5);
41491
41648
  }
41492
- return path19.join(cwd, ".agentv", "prompts");
41649
+ return path20.join(cwd, ".agentv", "prompts");
41493
41650
  }
41494
41651
  function createEvaluationCache() {
41495
41652
  const store = /* @__PURE__ */ new Map();
@@ -41514,7 +41671,7 @@ function createProgressReporter(maxWorkers, options) {
41514
41671
  };
41515
41672
  }
41516
41673
  function makeEvalKey(testFilePath, evalId) {
41517
- return `${path19.resolve(testFilePath)}::${evalId}`;
41674
+ return `${path20.resolve(testFilePath)}::${evalId}`;
41518
41675
  }
41519
41676
  function createDisplayIdTracker() {
41520
41677
  const map2 = /* @__PURE__ */ new Map();
@@ -41686,7 +41843,7 @@ async function runEvalCommand(input) {
41686
41843
  if (options.verbose) {
41687
41844
  console.log(`Repository root: ${repoRoot}`);
41688
41845
  }
41689
- const outputPath = options.outPath ? path19.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
41846
+ const outputPath = options.outPath ? path20.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
41690
41847
  console.log(`Output path: ${outputPath}`);
41691
41848
  const outputWriter = await createOutputWriter(outputPath, options.format);
41692
41849
  const cache = options.cache ? createEvaluationCache() : void 0;
@@ -41694,7 +41851,7 @@ async function runEvalCommand(input) {
41694
41851
  const allResults = [];
41695
41852
  let lastPromptDumpDir;
41696
41853
  const seenEvalCases = /* @__PURE__ */ new Set();
41697
- const resolvedTestFiles = input.testFiles.map((file2) => path19.resolve(file2));
41854
+ const resolvedTestFiles = input.testFiles.map((file2) => path20.resolve(file2));
41698
41855
  const displayIdTracker = createDisplayIdTracker();
41699
41856
  const totalWorkers = options.workers ?? DEFAULT_WORKERS;
41700
41857
  const fileConcurrency = Math.min(
@@ -41790,7 +41947,7 @@ async function resolveEvaluationRunner() {
41790
41947
  if (!overridePath) {
41791
41948
  return runEvaluation;
41792
41949
  }
41793
- const resolved = path19.isAbsolute(overridePath) ? overridePath : path19.resolve(process.cwd(), overridePath);
41950
+ const resolved = path20.isAbsolute(overridePath) ? overridePath : path20.resolve(process.cwd(), overridePath);
41794
41951
  const moduleUrl = pathToFileURL(resolved).href;
41795
41952
  const mod = await import(moduleUrl);
41796
41953
  const candidate = mod.runEvaluation;
@@ -41803,44 +41960,44 @@ async function resolveEvaluationRunner() {
41803
41960
  }
41804
41961
 
41805
41962
  // src/commands/eval/index.ts
41806
- var evalCommand = command({
41963
+ var evalCommand = command2({
41807
41964
  name: "eval",
41808
41965
  description: "Run eval suites and report results",
41809
41966
  args: {
41810
41967
  evalPaths: restPositionals({
41811
- type: string4,
41968
+ type: string5,
41812
41969
  displayName: "eval-paths",
41813
41970
  description: "Path(s) or glob(s) to evaluation .yaml file(s)"
41814
41971
  }),
41815
- target: option({
41816
- type: string4,
41972
+ target: option2({
41973
+ type: string5,
41817
41974
  long: "target",
41818
41975
  description: "Override target name from targets.yaml",
41819
41976
  defaultValue: () => "default"
41820
41977
  }),
41821
- targets: option({
41822
- type: optional2(string4),
41978
+ targets: option2({
41979
+ type: optional3(string5),
41823
41980
  long: "targets",
41824
41981
  description: "Path to targets.yaml (overrides discovery)"
41825
41982
  }),
41826
- evalId: option({
41827
- type: optional2(string4),
41983
+ evalId: option2({
41984
+ type: optional3(string5),
41828
41985
  long: "eval-id",
41829
41986
  description: "Run only the eval case with this identifier"
41830
41987
  }),
41831
- workers: option({
41988
+ workers: option2({
41832
41989
  type: number4,
41833
41990
  long: "workers",
41834
41991
  description: "Number of parallel workers (default: 3, max: 50). Can also be set per-target in targets.yaml",
41835
41992
  defaultValue: () => 3
41836
41993
  }),
41837
- out: option({
41838
- type: optional2(string4),
41994
+ out: option2({
41995
+ type: optional3(string5),
41839
41996
  long: "out",
41840
41997
  description: "Write results to the specified path"
41841
41998
  }),
41842
- outputFormat: option({
41843
- type: string4,
41999
+ outputFormat: option2({
42000
+ type: string5,
41844
42001
  long: "output-format",
41845
42002
  description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
41846
42003
  defaultValue: () => "jsonl"
@@ -41849,31 +42006,31 @@ var evalCommand = command({
41849
42006
  long: "dry-run",
41850
42007
  description: "Use mock provider responses instead of real LLM calls"
41851
42008
  }),
41852
- dryRunDelay: option({
42009
+ dryRunDelay: option2({
41853
42010
  type: number4,
41854
42011
  long: "dry-run-delay",
41855
42012
  description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
41856
42013
  defaultValue: () => 0
41857
42014
  }),
41858
- dryRunDelayMin: option({
42015
+ dryRunDelayMin: option2({
41859
42016
  type: number4,
41860
42017
  long: "dry-run-delay-min",
41861
42018
  description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
41862
42019
  defaultValue: () => 0
41863
42020
  }),
41864
- dryRunDelayMax: option({
42021
+ dryRunDelayMax: option2({
41865
42022
  type: number4,
41866
42023
  long: "dry-run-delay-max",
41867
42024
  description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
41868
42025
  defaultValue: () => 0
41869
42026
  }),
41870
- agentTimeout: option({
42027
+ agentTimeout: option2({
41871
42028
  type: number4,
41872
42029
  long: "agent-timeout",
41873
42030
  description: "Timeout in seconds for provider responses (default: 120)",
41874
42031
  defaultValue: () => 120
41875
42032
  }),
41876
- maxRetries: option({
42033
+ maxRetries: option2({
41877
42034
  type: number4,
41878
42035
  long: "max-retries",
41879
42036
  description: "Retry count for timeout recoveries (default: 2)",
@@ -41887,8 +42044,8 @@ var evalCommand = command({
41887
42044
  long: "verbose",
41888
42045
  description: "Enable verbose logging"
41889
42046
  }),
41890
- dumpPrompts: option({
41891
- type: optional2(string4),
42047
+ dumpPrompts: option2({
42048
+ type: optional3(string5),
41892
42049
  long: "dump-prompts",
41893
42050
  description: "Directory path for persisting prompt payloads for debugging"
41894
42051
  }),
@@ -41934,7 +42091,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
41934
42091
  const unmatched = [];
41935
42092
  const results = /* @__PURE__ */ new Set();
41936
42093
  for (const pattern of normalizedInputs) {
41937
- const candidatePath = path20.isAbsolute(pattern) ? path20.normalize(pattern) : path20.resolve(cwd, pattern);
42094
+ const candidatePath = path21.isAbsolute(pattern) ? path21.normalize(pattern) : path21.resolve(cwd, pattern);
41938
42095
  try {
41939
42096
  const stats = await stat4(candidatePath);
41940
42097
  if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
@@ -41958,7 +42115,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
41958
42115
  continue;
41959
42116
  }
41960
42117
  for (const filePath of yamlMatches) {
41961
- results.add(path20.normalize(filePath));
42118
+ results.add(path21.normalize(filePath));
41962
42119
  }
41963
42120
  }
41964
42121
  if (unmatched.length > 0) {
@@ -41974,11 +42131,11 @@ async function resolveEvalPaths(evalPaths, cwd) {
41974
42131
  }
41975
42132
 
41976
42133
  // src/commands/generate/index.ts
41977
- import { command as command2, flag as flag2, option as option2, optional as optional3, positional as positional2, string as string5, subcommands } from "cmd-ts";
42134
+ import { command as command3, flag as flag2, option as option3, optional as optional4, positional as positional3, string as string6, subcommands } from "cmd-ts";
41978
42135
 
41979
42136
  // src/commands/generate/rubrics.ts
41980
42137
  import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
41981
- import path21 from "node:path";
42138
+ import path24 from "node:path";
41982
42139
  import { pathToFileURL as pathToFileURL2 } from "node:url";
41983
42140
  import { isMap, isSeq, parseDocument } from "yaml";
41984
42141
  function isJsonObject3(value) {
@@ -41990,7 +42147,7 @@ function asString6(value) {
41990
42147
  async function loadRubricGenerator() {
41991
42148
  const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
41992
42149
  if (customGenerator) {
41993
- const generatorPath = path21.resolve(customGenerator);
42150
+ const generatorPath = path24.resolve(customGenerator);
41994
42151
  const generatorUrl = pathToFileURL2(generatorPath).href;
41995
42152
  const module = await import(generatorUrl);
41996
42153
  return module.generateRubrics;
@@ -42000,7 +42157,7 @@ async function loadRubricGenerator() {
42000
42157
  async function generateRubricsCommand(options) {
42001
42158
  const { file: file2, target: targetOverride, verbose } = options;
42002
42159
  console.log(`Generating rubrics for: ${file2}`);
42003
- const absolutePath = path21.resolve(file2);
42160
+ const absolutePath = path24.resolve(file2);
42004
42161
  const content = await readFile8(absolutePath, "utf8");
42005
42162
  const doc = parseDocument(content);
42006
42163
  const parsed = doc.toJSON();
@@ -42117,17 +42274,17 @@ function extractQuestion(evalCase) {
42117
42274
  }
42118
42275
 
42119
42276
  // src/commands/generate/index.ts
42120
- var rubricsCommand = command2({
42277
+ var rubricsCommand = command3({
42121
42278
  name: "rubrics",
42122
42279
  description: "Generate rubrics from expected_outcome in YAML eval file",
42123
42280
  args: {
42124
- file: positional2({
42125
- type: string5,
42281
+ file: positional3({
42282
+ type: string6,
42126
42283
  displayName: "file",
42127
42284
  description: "Path to YAML eval file"
42128
42285
  }),
42129
- target: option2({
42130
- type: optional3(string5),
42286
+ target: option3({
42287
+ type: optional4(string6),
42131
42288
  long: "target",
42132
42289
  short: "t",
42133
42290
  description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
@@ -42160,14 +42317,14 @@ var generateCommand = subcommands({
42160
42317
  });
42161
42318
 
42162
42319
  // src/commands/init/index.ts
42163
- import { existsSync, mkdirSync, writeFileSync } from "node:fs";
42164
- import path25 from "node:path";
42320
+ import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
42321
+ import path26 from "node:path";
42165
42322
  import * as readline from "node:readline/promises";
42166
- import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
42323
+ import { command as command4, option as option4, optional as optional5, string as string7 } from "cmd-ts";
42167
42324
 
42168
42325
  // src/templates/index.ts
42169
- import { readFileSync, readdirSync, statSync } from "node:fs";
42170
- import path24 from "node:path";
42326
+ import { readFileSync as readFileSync2, readdirSync, statSync } from "node:fs";
42327
+ import path25 from "node:path";
42171
42328
  import { fileURLToPath } from "node:url";
42172
42329
  function getGithubTemplates() {
42173
42330
  return getTemplatesFromDir(".github");
@@ -42179,12 +42336,12 @@ function getClaudeTemplates() {
42179
42336
  return getTemplatesFromDir(".claude");
42180
42337
  }
42181
42338
  function getTemplatesFromDir(subdir) {
42182
- const currentDir = path24.dirname(fileURLToPath(import.meta.url));
42339
+ const currentDir = path25.dirname(fileURLToPath(import.meta.url));
42183
42340
  let templatesDir;
42184
- if (currentDir.includes(`${path24.sep}dist`)) {
42185
- templatesDir = path24.join(currentDir, "templates", subdir);
42341
+ if (currentDir.includes(`${path25.sep}dist`)) {
42342
+ templatesDir = path25.join(currentDir, "templates", subdir);
42186
42343
  } else {
42187
- templatesDir = path24.join(currentDir, subdir);
42344
+ templatesDir = path25.join(currentDir, subdir);
42188
42345
  }
42189
42346
  return readTemplatesRecursively(templatesDir, "");
42190
42347
  }
@@ -42192,15 +42349,15 @@ function readTemplatesRecursively(dir, relativePath) {
42192
42349
  const templates = [];
42193
42350
  const entries = readdirSync(dir);
42194
42351
  for (const entry of entries) {
42195
- const fullPath = path24.join(dir, entry);
42352
+ const fullPath = path25.join(dir, entry);
42196
42353
  const stat6 = statSync(fullPath);
42197
- const entryRelativePath = relativePath ? path24.join(relativePath, entry) : entry;
42354
+ const entryRelativePath = relativePath ? path25.join(relativePath, entry) : entry;
42198
42355
  if (stat6.isDirectory()) {
42199
42356
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
42200
42357
  } else {
42201
- const content = readFileSync(fullPath, "utf-8");
42358
+ const content = readFileSync2(fullPath, "utf-8");
42202
42359
  templates.push({
42203
- path: entryRelativePath.split(path24.sep).join("/"),
42360
+ path: entryRelativePath.split(path25.sep).join("/"),
42204
42361
  // Normalize to forward slashes
42205
42362
  content
42206
42363
  });
@@ -42223,10 +42380,10 @@ async function promptYesNo(message) {
42223
42380
  }
42224
42381
  }
42225
42382
  async function initCommand(options = {}) {
42226
- const targetPath = path25.resolve(options.targetPath ?? ".");
42227
- const githubDir = path25.join(targetPath, ".github");
42228
- const agentvDir = path25.join(targetPath, ".agentv");
42229
- const claudeDir = path25.join(targetPath, ".claude");
42383
+ const targetPath = path26.resolve(options.targetPath ?? ".");
42384
+ const githubDir = path26.join(targetPath, ".github");
42385
+ const agentvDir = path26.join(targetPath, ".agentv");
42386
+ const claudeDir = path26.join(targetPath, ".claude");
42230
42387
  const githubTemplates = getGithubTemplates();
42231
42388
  const agentvTemplates = getAgentvTemplates();
42232
42389
  const claudeTemplates = getClaudeTemplates();
@@ -42234,32 +42391,32 @@ async function initCommand(options = {}) {
42234
42391
  const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
42235
42392
  const existingFiles = [];
42236
42393
  if (envTemplate) {
42237
- const envFilePath = path25.join(targetPath, ".env.template");
42394
+ const envFilePath = path26.join(targetPath, ".env.template");
42238
42395
  if (existsSync(envFilePath)) {
42239
42396
  existingFiles.push(".env.template");
42240
42397
  }
42241
42398
  }
42242
42399
  if (existsSync(githubDir)) {
42243
42400
  for (const template of githubTemplates) {
42244
- const targetFilePath = path25.join(githubDir, template.path);
42401
+ const targetFilePath = path26.join(githubDir, template.path);
42245
42402
  if (existsSync(targetFilePath)) {
42246
- existingFiles.push(path25.relative(targetPath, targetFilePath));
42403
+ existingFiles.push(path26.relative(targetPath, targetFilePath));
42247
42404
  }
42248
42405
  }
42249
42406
  }
42250
42407
  if (existsSync(agentvDir)) {
42251
42408
  for (const template of otherAgentvTemplates) {
42252
- const targetFilePath = path25.join(agentvDir, template.path);
42409
+ const targetFilePath = path26.join(agentvDir, template.path);
42253
42410
  if (existsSync(targetFilePath)) {
42254
- existingFiles.push(path25.relative(targetPath, targetFilePath));
42411
+ existingFiles.push(path26.relative(targetPath, targetFilePath));
42255
42412
  }
42256
42413
  }
42257
42414
  }
42258
42415
  if (existsSync(claudeDir)) {
42259
42416
  for (const template of claudeTemplates) {
42260
- const targetFilePath = path25.join(claudeDir, template.path);
42417
+ const targetFilePath = path26.join(claudeDir, template.path);
42261
42418
  if (existsSync(targetFilePath)) {
42262
- existingFiles.push(path25.relative(targetPath, targetFilePath));
42419
+ existingFiles.push(path26.relative(targetPath, targetFilePath));
42263
42420
  }
42264
42421
  }
42265
42422
  }
@@ -42286,36 +42443,36 @@ async function initCommand(options = {}) {
42286
42443
  mkdirSync(claudeDir, { recursive: true });
42287
42444
  }
42288
42445
  if (envTemplate) {
42289
- const envFilePath = path25.join(targetPath, ".env.template");
42290
- writeFileSync(envFilePath, envTemplate.content, "utf-8");
42446
+ const envFilePath = path26.join(targetPath, ".env.template");
42447
+ writeFileSync2(envFilePath, envTemplate.content, "utf-8");
42291
42448
  console.log("Created .env.template");
42292
42449
  }
42293
42450
  for (const template of githubTemplates) {
42294
- const targetFilePath = path25.join(githubDir, template.path);
42295
- const targetDirPath = path25.dirname(targetFilePath);
42451
+ const targetFilePath = path26.join(githubDir, template.path);
42452
+ const targetDirPath = path26.dirname(targetFilePath);
42296
42453
  if (!existsSync(targetDirPath)) {
42297
42454
  mkdirSync(targetDirPath, { recursive: true });
42298
42455
  }
42299
- writeFileSync(targetFilePath, template.content, "utf-8");
42300
- console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
42456
+ writeFileSync2(targetFilePath, template.content, "utf-8");
42457
+ console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
42301
42458
  }
42302
42459
  for (const template of otherAgentvTemplates) {
42303
- const targetFilePath = path25.join(agentvDir, template.path);
42304
- const targetDirPath = path25.dirname(targetFilePath);
42460
+ const targetFilePath = path26.join(agentvDir, template.path);
42461
+ const targetDirPath = path26.dirname(targetFilePath);
42305
42462
  if (!existsSync(targetDirPath)) {
42306
42463
  mkdirSync(targetDirPath, { recursive: true });
42307
42464
  }
42308
- writeFileSync(targetFilePath, template.content, "utf-8");
42309
- console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
42465
+ writeFileSync2(targetFilePath, template.content, "utf-8");
42466
+ console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
42310
42467
  }
42311
42468
  for (const template of claudeTemplates) {
42312
- const targetFilePath = path25.join(claudeDir, template.path);
42313
- const targetDirPath = path25.dirname(targetFilePath);
42469
+ const targetFilePath = path26.join(claudeDir, template.path);
42470
+ const targetDirPath = path26.dirname(targetFilePath);
42314
42471
  if (!existsSync(targetDirPath)) {
42315
42472
  mkdirSync(targetDirPath, { recursive: true });
42316
42473
  }
42317
- writeFileSync(targetFilePath, template.content, "utf-8");
42318
- console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
42474
+ writeFileSync2(targetFilePath, template.content, "utf-8");
42475
+ console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
42319
42476
  }
42320
42477
  console.log("\nAgentV initialized successfully!");
42321
42478
  console.log("\nFiles installed to root:");
@@ -42323,17 +42480,17 @@ async function initCommand(options = {}) {
42323
42480
  console.log(" - .env.template");
42324
42481
  }
42325
42482
  console.log(`
42326
- Files installed to ${path25.relative(targetPath, githubDir)}:`);
42483
+ Files installed to ${path26.relative(targetPath, githubDir)}:`);
42327
42484
  for (const t of githubTemplates) {
42328
42485
  console.log(` - ${t.path}`);
42329
42486
  }
42330
42487
  console.log(`
42331
- Files installed to ${path25.relative(targetPath, agentvDir)}:`);
42488
+ Files installed to ${path26.relative(targetPath, agentvDir)}:`);
42332
42489
  for (const t of otherAgentvTemplates) {
42333
42490
  console.log(` - ${t.path}`);
42334
42491
  }
42335
42492
  console.log(`
42336
- Files installed to ${path25.relative(targetPath, claudeDir)}:`);
42493
+ Files installed to ${path26.relative(targetPath, claudeDir)}:`);
42337
42494
  for (const t of claudeTemplates) {
42338
42495
  console.log(` - ${t.path}`);
42339
42496
  }
@@ -42342,12 +42499,12 @@ Files installed to ${path25.relative(targetPath, claudeDir)}:`);
42342
42499
  console.log(" 2. Configure targets in .agentv/targets.yaml");
42343
42500
  console.log(" 3. Create eval files using the schema and prompt templates");
42344
42501
  }
42345
- var initCmdTsCommand = command3({
42502
+ var initCmdTsCommand = command4({
42346
42503
  name: "init",
42347
42504
  description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
42348
42505
  args: {
42349
- path: option3({
42350
- type: optional4(string6),
42506
+ path: option4({
42507
+ type: optional5(string7),
42351
42508
  long: "path",
42352
42509
  description: "Target directory for initialization (default: current directory)"
42353
42510
  })
@@ -42363,7 +42520,7 @@ var initCmdTsCommand = command3({
42363
42520
  });
42364
42521
 
42365
42522
  // src/commands/validate/index.ts
42366
- import { command as command4, restPositionals as restPositionals2, string as string7 } from "cmd-ts";
42523
+ import { command as command5, restPositionals as restPositionals2, string as string8 } from "cmd-ts";
42367
42524
 
42368
42525
  // src/commands/validate/format-output.ts
42369
42526
  var ANSI_RED3 = "\x1B[31m";
@@ -42448,7 +42605,7 @@ function isTTY2() {
42448
42605
  // src/commands/validate/validate-files.ts
42449
42606
  import { constants as constants7 } from "node:fs";
42450
42607
  import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
42451
- import path26 from "node:path";
42608
+ import path27 from "node:path";
42452
42609
  async function validateFiles(paths) {
42453
42610
  const filePaths = await expandPaths(paths);
42454
42611
  const results = [];
@@ -42466,7 +42623,7 @@ async function validateFiles(paths) {
42466
42623
  };
42467
42624
  }
42468
42625
  async function validateSingleFile(filePath) {
42469
- const absolutePath = path26.resolve(filePath);
42626
+ const absolutePath = path27.resolve(filePath);
42470
42627
  const fileType = await detectFileType(absolutePath);
42471
42628
  let result;
42472
42629
  if (fileType === "eval") {
@@ -42491,7 +42648,7 @@ async function validateSingleFile(filePath) {
42491
42648
  async function expandPaths(paths) {
42492
42649
  const expanded = [];
42493
42650
  for (const inputPath of paths) {
42494
- const absolutePath = path26.resolve(inputPath);
42651
+ const absolutePath = path27.resolve(inputPath);
42495
42652
  try {
42496
42653
  await access7(absolutePath, constants7.F_OK);
42497
42654
  } catch {
@@ -42515,7 +42672,7 @@ async function findYamlFiles(dirPath) {
42515
42672
  try {
42516
42673
  const entries = await readdir3(dirPath, { withFileTypes: true });
42517
42674
  for (const entry of entries) {
42518
- const fullPath = path26.join(dirPath, entry.name);
42675
+ const fullPath = path27.join(dirPath, entry.name);
42519
42676
  if (entry.isDirectory()) {
42520
42677
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
42521
42678
  continue;
@@ -42532,7 +42689,7 @@ async function findYamlFiles(dirPath) {
42532
42689
  return results;
42533
42690
  }
42534
42691
  function isYamlFile(filePath) {
42535
- const ext = path26.extname(filePath).toLowerCase();
42692
+ const ext = path27.extname(filePath).toLowerCase();
42536
42693
  return ext === ".yaml" || ext === ".yml";
42537
42694
  }
42538
42695
 
@@ -42549,12 +42706,12 @@ async function runValidateCommand(paths) {
42549
42706
  process.exit(1);
42550
42707
  }
42551
42708
  }
42552
- var validateCommand = command4({
42709
+ var validateCommand = command5({
42553
42710
  name: "validate",
42554
42711
  description: "Validate AgentV eval and targets YAML files",
42555
42712
  args: {
42556
42713
  paths: restPositionals2({
42557
- type: string7,
42714
+ type: string8,
42558
42715
  displayName: "paths",
42559
42716
  description: "Files or directories to validate"
42560
42717
  })
@@ -42570,16 +42727,17 @@ var validateCommand = command4({
42570
42727
  });
42571
42728
 
42572
42729
  // src/index.ts
42573
- var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
42730
+ var packageJson = JSON.parse(readFileSync3(new URL("../package.json", import.meta.url), "utf8"));
42574
42731
  var app = subcommands2({
42575
42732
  name: "agentv",
42576
42733
  description: "AgentV CLI",
42577
42734
  version: packageJson.version,
42578
42735
  cmds: {
42736
+ convert: convertCommand,
42579
42737
  eval: evalCommand,
42580
- validate: validateCommand,
42581
42738
  generate: generateCommand,
42582
- init: initCmdTsCommand
42739
+ init: initCmdTsCommand,
42740
+ validate: validateCommand
42583
42741
  }
42584
42742
  });
42585
42743
  async function runCli(argv = process.argv) {
@@ -42590,4 +42748,4 @@ export {
42590
42748
  app,
42591
42749
  runCli
42592
42750
  };
42593
- //# sourceMappingURL=chunk-6R2YRXCQ.js.map
42751
+ //# sourceMappingURL=chunk-3RYQPI4H.js.map