agentv 0.20.1 → 0.21.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -142,11 +142,20 @@ var require_dist = __commonJS({
142
142
 
143
143
  // src/index.ts
144
144
  import { readFileSync as readFileSync2 } from "node:fs";
145
- import { Command } from "commander";
145
+ import { binary, run, subcommands as subcommands2 } from "cmd-ts";
146
146
 
147
147
  // src/commands/eval/index.ts
148
148
  import { stat as stat4 } from "node:fs/promises";
149
149
  import path19 from "node:path";
150
+ import {
151
+ command,
152
+ flag,
153
+ number as number4,
154
+ option,
155
+ optional as optional2,
156
+ restPositionals,
157
+ string as string4
158
+ } from "cmd-ts";
150
159
  import fg from "fast-glob";
151
160
 
152
161
  // src/commands/eval/run-eval.ts
@@ -155,7 +164,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
155
164
  import path18 from "node:path";
156
165
  import { pathToFileURL } from "node:url";
157
166
 
158
- // ../../packages/core/dist/chunk-SVY324GN.js
167
+ // ../../packages/core/dist/chunk-B2J23S7D.js
159
168
  import { constants } from "node:fs";
160
169
  import { access, readFile } from "node:fs/promises";
161
170
  import path from "node:path";
@@ -638,8 +647,8 @@ function getErrorMap() {
638
647
 
639
648
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
640
649
  var makeIssue = (params) => {
641
- const { data, path: path25, errorMaps, issueData } = params;
642
- const fullPath = [...path25, ...issueData.path || []];
650
+ const { data, path: path26, errorMaps, issueData } = params;
651
+ const fullPath = [...path26, ...issueData.path || []];
643
652
  const fullIssue = {
644
653
  ...issueData,
645
654
  path: fullPath
@@ -755,11 +764,11 @@ var errorUtil;
755
764
 
756
765
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
757
766
  var ParseInputLazyPath = class {
758
- constructor(parent, value, path25, key2) {
767
+ constructor(parent, value, path26, key2) {
759
768
  this._cachedPath = [];
760
769
  this.parent = parent;
761
770
  this.data = value;
762
- this._path = path25;
771
+ this._path = path26;
763
772
  this._key = key2;
764
773
  }
765
774
  get path() {
@@ -1039,8 +1048,8 @@ var ZodType = class {
1039
1048
  promise() {
1040
1049
  return ZodPromise.create(this, this._def);
1041
1050
  }
1042
- or(option) {
1043
- return ZodUnion.create([this, option], this._def);
1051
+ or(option4) {
1052
+ return ZodUnion.create([this, option4], this._def);
1044
1053
  }
1045
1054
  and(incoming) {
1046
1055
  return ZodIntersection.create(this, incoming, this._def);
@@ -2890,7 +2899,7 @@ var ZodUnion = class extends ZodType {
2890
2899
  return INVALID;
2891
2900
  }
2892
2901
  if (ctx.common.async) {
2893
- return Promise.all(options.map(async (option) => {
2902
+ return Promise.all(options.map(async (option4) => {
2894
2903
  const childCtx = {
2895
2904
  ...ctx,
2896
2905
  common: {
@@ -2900,7 +2909,7 @@ var ZodUnion = class extends ZodType {
2900
2909
  parent: null
2901
2910
  };
2902
2911
  return {
2903
- result: await option._parseAsync({
2912
+ result: await option4._parseAsync({
2904
2913
  data: ctx.data,
2905
2914
  path: ctx.path,
2906
2915
  parent: childCtx
@@ -2911,7 +2920,7 @@ var ZodUnion = class extends ZodType {
2911
2920
  } else {
2912
2921
  let dirty = void 0;
2913
2922
  const issues = [];
2914
- for (const option of options) {
2923
+ for (const option4 of options) {
2915
2924
  const childCtx = {
2916
2925
  ...ctx,
2917
2926
  common: {
@@ -2920,7 +2929,7 @@ var ZodUnion = class extends ZodType {
2920
2929
  },
2921
2930
  parent: null
2922
2931
  };
2923
- const result = option._parseSync({
2932
+ const result = option4._parseSync({
2924
2933
  data: ctx.data,
2925
2934
  path: ctx.path,
2926
2935
  parent: childCtx
@@ -3001,8 +3010,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
3001
3010
  }
3002
3011
  const discriminator = this.discriminator;
3003
3012
  const discriminatorValue = ctx.data[discriminator];
3004
- const option = this.optionsMap.get(discriminatorValue);
3005
- if (!option) {
3013
+ const option4 = this.optionsMap.get(discriminatorValue);
3014
+ if (!option4) {
3006
3015
  addIssueToContext(ctx, {
3007
3016
  code: ZodIssueCode.invalid_union_discriminator,
3008
3017
  options: Array.from(this.optionsMap.keys()),
@@ -3011,13 +3020,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
3011
3020
  return INVALID;
3012
3021
  }
3013
3022
  if (ctx.common.async) {
3014
- return option._parseAsync({
3023
+ return option4._parseAsync({
3015
3024
  data: ctx.data,
3016
3025
  path: ctx.path,
3017
3026
  parent: ctx
3018
3027
  });
3019
3028
  } else {
3020
- return option._parseSync({
3029
+ return option4._parseSync({
3021
3030
  data: ctx.data,
3022
3031
  path: ctx.path,
3023
3032
  parent: ctx
@@ -4201,7 +4210,7 @@ var coerce = {
4201
4210
  };
4202
4211
  var NEVER = INVALID;
4203
4212
 
4204
- // ../../packages/core/dist/chunk-SVY324GN.js
4213
+ // ../../packages/core/dist/chunk-B2J23S7D.js
4205
4214
  async function fileExists(filePath) {
4206
4215
  try {
4207
4216
  await access(filePath, constants.F_OK);
@@ -4577,9 +4586,9 @@ function resolveVSCodeConfig(target, env, insiders) {
4577
4586
  const dryRunSource = target.dry_run ?? target.dryRun;
4578
4587
  const subagentRootSource = target.subagent_root ?? target.subagentRoot;
4579
4588
  const defaultCommand = insiders ? "code-insiders" : "code";
4580
- const command = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
4589
+ const command5 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
4581
4590
  return {
4582
- command,
4591
+ command: command5,
4583
4592
  waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
4584
4593
  dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
4585
4594
  subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
@@ -5976,10 +5985,10 @@ function assignProp(target, prop, value) {
5976
5985
  configurable: true
5977
5986
  });
5978
5987
  }
5979
- function getElementAtPath(obj, path25) {
5980
- if (!path25)
5988
+ function getElementAtPath(obj, path26) {
5989
+ if (!path26)
5981
5990
  return obj;
5982
- return path25.reduce((acc, key2) => acc?.[key2], obj);
5991
+ return path26.reduce((acc, key2) => acc?.[key2], obj);
5983
5992
  }
5984
5993
  function promiseAllObject(promisesObj) {
5985
5994
  const keys = Object.keys(promisesObj);
@@ -6299,11 +6308,11 @@ function aborted(x, startIndex = 0) {
6299
6308
  }
6300
6309
  return false;
6301
6310
  }
6302
- function prefixIssues(path25, issues) {
6311
+ function prefixIssues(path26, issues) {
6303
6312
  return issues.map((iss) => {
6304
6313
  var _a17;
6305
6314
  (_a17 = iss).path ?? (_a17.path = []);
6306
- iss.path.unshift(path25);
6315
+ iss.path.unshift(path26);
6307
6316
  return iss;
6308
6317
  });
6309
6318
  }
@@ -6440,7 +6449,7 @@ function treeifyError(error40, _mapper) {
6440
6449
  return issue2.message;
6441
6450
  };
6442
6451
  const result = { errors: [] };
6443
- const processError = (error41, path25 = []) => {
6452
+ const processError = (error41, path26 = []) => {
6444
6453
  var _a17, _b8;
6445
6454
  for (const issue2 of error41.issues) {
6446
6455
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6450,7 +6459,7 @@ function treeifyError(error40, _mapper) {
6450
6459
  } else if (issue2.code === "invalid_element") {
6451
6460
  processError({ issues: issue2.issues }, issue2.path);
6452
6461
  } else {
6453
- const fullpath = [...path25, ...issue2.path];
6462
+ const fullpath = [...path26, ...issue2.path];
6454
6463
  if (fullpath.length === 0) {
6455
6464
  result.errors.push(mapper(issue2));
6456
6465
  continue;
@@ -6480,9 +6489,9 @@ function treeifyError(error40, _mapper) {
6480
6489
  processError(error40);
6481
6490
  return result;
6482
6491
  }
6483
- function toDotPath(path25) {
6492
+ function toDotPath(path26) {
6484
6493
  const segs = [];
6485
- for (const seg of path25) {
6494
+ for (const seg of path26) {
6486
6495
  if (typeof seg === "number")
6487
6496
  segs.push(`[${seg}]`);
6488
6497
  else if (typeof seg === "symbol")
@@ -8081,7 +8090,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
8081
8090
  defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
8082
8091
  defineLazy(inst._zod, "values", () => {
8083
8092
  if (def.options.every((o) => o._zod.values)) {
8084
- return new Set(def.options.flatMap((option) => Array.from(option._zod.values)));
8093
+ return new Set(def.options.flatMap((option4) => Array.from(option4._zod.values)));
8085
8094
  }
8086
8095
  return void 0;
8087
8096
  });
@@ -8095,8 +8104,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
8095
8104
  inst._zod.parse = (payload, ctx) => {
8096
8105
  let async = false;
8097
8106
  const results = [];
8098
- for (const option of def.options) {
8099
- const result = option._zod.run({
8107
+ for (const option4 of def.options) {
8108
+ const result = option4._zod.run({
8100
8109
  value: payload.value,
8101
8110
  issues: []
8102
8111
  }, ctx);
@@ -8121,10 +8130,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
8121
8130
  const _super = inst._zod.parse;
8122
8131
  defineLazy(inst._zod, "propValues", () => {
8123
8132
  const propValues = {};
8124
- for (const option of def.options) {
8125
- const pv = option._zod.propValues;
8133
+ for (const option4 of def.options) {
8134
+ const pv = option4._zod.propValues;
8126
8135
  if (!pv || Object.keys(pv).length === 0)
8127
- throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option)}"`);
8136
+ throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option4)}"`);
8128
8137
  for (const [k, v] of Object.entries(pv)) {
8129
8138
  if (!propValues[k])
8130
8139
  propValues[k] = /* @__PURE__ */ new Set();
@@ -15328,8 +15337,8 @@ function isTransforming(_schema, _ctx) {
15328
15337
  return false;
15329
15338
  }
15330
15339
  case "union": {
15331
- for (const option of def.options) {
15332
- if (isTransforming(option, ctx))
15340
+ for (const option4 of def.options) {
15341
+ if (isTransforming(option4, ctx))
15333
15342
  return true;
15334
15343
  }
15335
15344
  return false;
@@ -26035,14 +26044,14 @@ function createAzure(options = {}) {
26035
26044
  description: "Azure OpenAI resource name"
26036
26045
  });
26037
26046
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26038
- const url2 = ({ path: path25, modelId }) => {
26047
+ const url2 = ({ path: path26, modelId }) => {
26039
26048
  var _a24;
26040
26049
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26041
26050
  let fullUrl;
26042
26051
  if (options.useDeploymentBasedUrls) {
26043
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path25}`);
26052
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path26}`);
26044
26053
  } else {
26045
- fullUrl = new URL(`${baseUrlPrefix}/v1${path25}`);
26054
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path26}`);
26046
26055
  }
26047
26056
  fullUrl.searchParams.set("api-version", apiVersion);
26048
26057
  return fullUrl.toString();
@@ -34553,7 +34562,7 @@ function isTestMessage(value) {
34553
34562
  }
34554
34563
  return candidate.content.every(isJsonObject);
34555
34564
  }
34556
- var EVALUATOR_KIND_VALUES = ["code", "llm_judge"];
34565
+ var EVALUATOR_KIND_VALUES = ["code", "llm_judge", "rubric"];
34557
34566
  var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
34558
34567
  function isEvaluatorKind(value) {
34559
34568
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
@@ -34920,6 +34929,29 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
34920
34929
  }
34921
34930
  }
34922
34931
  const _model = asString2(rawEvaluator.model);
34932
+ if (typeValue === "rubric") {
34933
+ const rubrics = rawEvaluator.rubrics;
34934
+ if (!Array.isArray(rubrics)) {
34935
+ logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': missing rubrics array`);
34936
+ continue;
34937
+ }
34938
+ const parsedRubrics = rubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
34939
+ id: asString2(rubric.id) ?? `rubric-${index + 1}`,
34940
+ description: asString2(rubric.description) ?? "",
34941
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
34942
+ required: typeof rubric.required === "boolean" ? rubric.required : true
34943
+ })).filter((r) => r.description.length > 0);
34944
+ if (parsedRubrics.length === 0) {
34945
+ logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': no valid rubrics found`);
34946
+ continue;
34947
+ }
34948
+ evaluators.push({
34949
+ name: name16,
34950
+ type: "rubric",
34951
+ rubrics: parsedRubrics
34952
+ });
34953
+ continue;
34954
+ }
34923
34955
  evaluators.push({
34924
34956
  name: name16,
34925
34957
  type: "llm_judge",
@@ -35390,7 +35422,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35390
35422
  continue;
35391
35423
  }
35392
35424
  const conversationId = asString5(evalcase.conversation_id);
35393
- const outcome = asString5(evalcase.outcome);
35425
+ const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
35394
35426
  const inputMessagesValue = evalcase.input_messages;
35395
35427
  const expectedMessagesValue = evalcase.expected_messages;
35396
35428
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
@@ -35444,6 +35476,33 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35444
35476
  logError(`Skipping eval case '${id}': ${message}`);
35445
35477
  continue;
35446
35478
  }
35479
+ const inlineRubrics = evalcase.rubrics;
35480
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
35481
+ const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
35482
+ if (typeof rubric === "string") {
35483
+ return {
35484
+ id: `rubric-${index + 1}`,
35485
+ description: rubric,
35486
+ weight: 1,
35487
+ required: true
35488
+ };
35489
+ }
35490
+ return {
35491
+ id: asString5(rubric.id) ?? `rubric-${index + 1}`,
35492
+ description: asString5(rubric.description) ?? "",
35493
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
35494
+ required: typeof rubric.required === "boolean" ? rubric.required : true
35495
+ };
35496
+ }).filter((r) => r.description.length > 0);
35497
+ if (rubricItems.length > 0) {
35498
+ const rubricEvaluator = {
35499
+ name: "rubric",
35500
+ type: "rubric",
35501
+ rubrics: rubricItems
35502
+ };
35503
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
35504
+ }
35505
+ }
35447
35506
  const userFilePaths = [];
35448
35507
  for (const segment of inputSegments) {
35449
35508
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -35536,6 +35595,9 @@ var AzureProvider = class {
35536
35595
  retryConfig: this.retryConfig
35537
35596
  });
35538
35597
  }
35598
+ asLanguageModel() {
35599
+ return this.model;
35600
+ }
35539
35601
  };
35540
35602
  var AnthropicProvider = class {
35541
35603
  constructor(targetName, config2) {
@@ -35569,6 +35631,9 @@ var AnthropicProvider = class {
35569
35631
  providerOptions
35570
35632
  });
35571
35633
  }
35634
+ asLanguageModel() {
35635
+ return this.model;
35636
+ }
35572
35637
  };
35573
35638
  var GeminiProvider = class {
35574
35639
  constructor(targetName, config2) {
@@ -35599,6 +35664,9 @@ var GeminiProvider = class {
35599
35664
  retryConfig: this.retryConfig
35600
35665
  });
35601
35666
  }
35667
+ asLanguageModel() {
35668
+ return this.model;
35669
+ }
35602
35670
  };
35603
35671
  function buildAzureOptions(config2) {
35604
35672
  const options = {
@@ -35828,7 +35896,7 @@ async function withRetry(fn, retryConfig, signal) {
35828
35896
  }
35829
35897
  var execAsync2 = promisify2(execWithCallback);
35830
35898
  var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
35831
- async function defaultCommandRunner(command, options) {
35899
+ async function defaultCommandRunner(command5, options) {
35832
35900
  const execOptions = {
35833
35901
  cwd: options.cwd,
35834
35902
  env: options.env,
@@ -35838,7 +35906,7 @@ async function defaultCommandRunner(command, options) {
35838
35906
  shell: process.platform === "win32" ? "powershell.exe" : void 0
35839
35907
  };
35840
35908
  try {
35841
- const { stdout, stderr } = await execAsync2(command, execOptions);
35909
+ const { stdout, stderr } = await execAsync2(command5, execOptions);
35842
35910
  return {
35843
35911
  stdout,
35844
35912
  stderr,
@@ -37262,6 +37330,144 @@ function createProvider(target) {
37262
37330
  }
37263
37331
  }
37264
37332
  }
37333
+ var rubricCheckResultSchema = external_exports.object({
37334
+ id: external_exports.string().describe("The ID of the rubric item being checked"),
37335
+ satisfied: external_exports.boolean().describe("Whether this rubric requirement is met"),
37336
+ reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this check")
37337
+ });
37338
+ var rubricEvaluationSchema = external_exports.object({
37339
+ checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
37340
+ overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
37341
+ });
37342
+ var RubricEvaluator = class {
37343
+ kind = "rubric";
37344
+ config;
37345
+ resolveJudgeProvider;
37346
+ constructor(options) {
37347
+ this.config = options.config;
37348
+ this.resolveJudgeProvider = options.resolveJudgeProvider;
37349
+ }
37350
+ async evaluate(context) {
37351
+ const judgeProvider = await this.resolveJudgeProvider(context);
37352
+ if (!judgeProvider) {
37353
+ throw new Error("No judge provider available for rubric evaluation");
37354
+ }
37355
+ if (!this.config.rubrics || this.config.rubrics.length === 0) {
37356
+ throw new Error(
37357
+ `No rubrics found for evaluator "${this.config.name}". Run "agentv generate rubrics" first.`
37358
+ );
37359
+ }
37360
+ const prompt = this.buildPrompt(context, this.config.rubrics);
37361
+ const model = judgeProvider.asLanguageModel?.();
37362
+ if (!model) {
37363
+ throw new Error("Judge provider does not support language model interface");
37364
+ }
37365
+ const system = `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
37366
+ You must return a valid JSON object matching this schema:
37367
+ {
37368
+ "checks": [
37369
+ {
37370
+ "id": "string (rubric id)",
37371
+ "satisfied": boolean,
37372
+ "reasoning": "string (brief explanation)"
37373
+ }
37374
+ ],
37375
+ "overall_reasoning": "string (summary)"
37376
+ }`;
37377
+ let result;
37378
+ let lastError;
37379
+ for (let attempt = 1; attempt <= 3; attempt++) {
37380
+ try {
37381
+ const { text: text2 } = await generateText({
37382
+ model,
37383
+ system,
37384
+ prompt
37385
+ });
37386
+ const cleaned = text2.replace(/```json\n?|```/g, "").trim();
37387
+ result = rubricEvaluationSchema.parse(JSON.parse(cleaned));
37388
+ break;
37389
+ } catch (e) {
37390
+ lastError = e instanceof Error ? e : new Error(String(e));
37391
+ }
37392
+ }
37393
+ if (!result) {
37394
+ throw new Error(
37395
+ `Failed to parse rubric evaluation result after 3 attempts: ${lastError?.message}`
37396
+ );
37397
+ }
37398
+ const { score, verdict, hits, misses } = this.calculateScore(result, this.config.rubrics);
37399
+ return {
37400
+ score,
37401
+ verdict,
37402
+ hits,
37403
+ misses,
37404
+ expectedAspectCount: this.config.rubrics.length,
37405
+ reasoning: result.overall_reasoning,
37406
+ evaluatorRawRequest: {
37407
+ prompt
37408
+ }
37409
+ };
37410
+ }
37411
+ buildPrompt(context, rubrics) {
37412
+ const parts = [
37413
+ "You are an expert evaluator. Evaluate the candidate answer against each rubric item below.",
37414
+ "",
37415
+ "[[ ## question ## ]]",
37416
+ context.evalCase.question,
37417
+ "",
37418
+ "[[ ## expected_outcome ## ]]",
37419
+ context.evalCase.expected_outcome,
37420
+ ""
37421
+ ];
37422
+ if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
37423
+ parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
37424
+ }
37425
+ parts.push("[[ ## candidate_answer ## ]]", context.candidate, "", "[[ ## rubrics ## ]]");
37426
+ for (const rubric of rubrics) {
37427
+ const requiredLabel = rubric.required ? " (REQUIRED)" : "";
37428
+ const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
37429
+ parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
37430
+ }
37431
+ parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
37432
+ return parts.join("\n");
37433
+ }
37434
+ calculateScore(result, rubrics) {
37435
+ const rubricMap = new Map(rubrics.map((r) => [r.id, r]));
37436
+ const hits = [];
37437
+ const misses = [];
37438
+ let totalWeight = 0;
37439
+ let earnedWeight = 0;
37440
+ let failedRequired = false;
37441
+ for (const check2 of result.checks) {
37442
+ const rubric = rubricMap.get(check2.id);
37443
+ if (!rubric) {
37444
+ continue;
37445
+ }
37446
+ totalWeight += rubric.weight;
37447
+ if (check2.satisfied) {
37448
+ earnedWeight += rubric.weight;
37449
+ hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
37450
+ } else {
37451
+ misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
37452
+ if (rubric.required) {
37453
+ failedRequired = true;
37454
+ }
37455
+ }
37456
+ }
37457
+ const score = totalWeight > 0 ? Math.min(1, Math.max(0, earnedWeight / totalWeight)) : 0;
37458
+ let verdict;
37459
+ if (failedRequired) {
37460
+ verdict = "fail";
37461
+ } else if (score >= 0.8) {
37462
+ verdict = "pass";
37463
+ } else if (score >= 0.6) {
37464
+ verdict = "borderline";
37465
+ } else {
37466
+ verdict = "fail";
37467
+ }
37468
+ return { score, verdict, hits, misses };
37469
+ }
37470
+ };
37265
37471
  var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
37266
37472
 
37267
37473
  Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
@@ -37624,7 +37830,7 @@ function pLimit(concurrency) {
37624
37830
  activeCount--;
37625
37831
  resumeNext();
37626
37832
  };
37627
- const run = async (function_, resolve2, arguments_) => {
37833
+ const run2 = async (function_, resolve2, arguments_) => {
37628
37834
  const result = (async () => function_(...arguments_))();
37629
37835
  resolve2(result);
37630
37836
  try {
@@ -37637,7 +37843,7 @@ function pLimit(concurrency) {
37637
37843
  new Promise((internalResolve) => {
37638
37844
  queue.enqueue(internalResolve);
37639
37845
  }).then(
37640
- run.bind(void 0, function_, resolve2, arguments_)
37846
+ run2.bind(void 0, function_, resolve2, arguments_)
37641
37847
  );
37642
37848
  (async () => {
37643
37849
  await Promise.resolve();
@@ -38214,6 +38420,7 @@ async function runEvaluatorList(options) {
38214
38420
  name: evaluator.name,
38215
38421
  type: evaluator.type,
38216
38422
  score: score2.score,
38423
+ verdict: score2.verdict,
38217
38424
  hits: score2.hits,
38218
38425
  misses: score2.misses,
38219
38426
  reasoning: score2.reasoning,
@@ -38241,6 +38448,40 @@ async function runEvaluatorList(options) {
38241
38448
  name: evaluator.name,
38242
38449
  type: evaluator.type,
38243
38450
  score: score2.score,
38451
+ verdict: score2.verdict,
38452
+ hits: score2.hits,
38453
+ misses: score2.misses,
38454
+ reasoning: score2.reasoning,
38455
+ evaluator_provider_request: score2.evaluatorRawRequest
38456
+ });
38457
+ continue;
38458
+ }
38459
+ if (evaluator.type === "rubric") {
38460
+ const rubricEvaluator = new RubricEvaluator({
38461
+ config: evaluator,
38462
+ resolveJudgeProvider: async (context) => {
38463
+ if (context.judgeProvider) {
38464
+ return context.judgeProvider;
38465
+ }
38466
+ return judgeProvider;
38467
+ }
38468
+ });
38469
+ const score2 = await rubricEvaluator.evaluate({
38470
+ evalCase,
38471
+ candidate,
38472
+ target,
38473
+ provider,
38474
+ attempt,
38475
+ promptInputs,
38476
+ now,
38477
+ judgeProvider
38478
+ });
38479
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
38480
+ evaluatorResults.push({
38481
+ name: evaluator.name,
38482
+ type: evaluator.type,
38483
+ score: score2.score,
38484
+ verdict: score2.verdict,
38244
38485
  hits: score2.hits,
38245
38486
  misses: score2.misses,
38246
38487
  reasoning: score2.reasoning,
@@ -38470,8 +38711,81 @@ function isTimeoutLike(error40) {
38470
38711
  const value = String(error40).toLowerCase();
38471
38712
  return value.includes("timeout");
38472
38713
  }
38473
- function createAgentKernel() {
38474
- return { status: "stub" };
38714
+ var rubricItemSchema = external_exports.object({
38715
+ id: external_exports.string().describe("Short identifier for this rubric (e.g., clarity, completeness)"),
38716
+ description: external_exports.string().describe("What this rubric checks for"),
38717
+ weight: external_exports.number().default(1).describe("Relative importance (default 1.0)"),
38718
+ required: external_exports.boolean().default(true).describe("Whether this is a mandatory requirement")
38719
+ });
38720
+ var rubricGenerationSchema = external_exports.object({
38721
+ rubrics: external_exports.array(rubricItemSchema).describe("List of evaluation rubrics")
38722
+ });
38723
+ async function generateRubrics(options) {
38724
+ const { expectedOutcome, question, referenceAnswer, provider } = options;
38725
+ const prompt = buildPrompt(expectedOutcome, question, referenceAnswer);
38726
+ const model = provider.asLanguageModel?.();
38727
+ if (!model) {
38728
+ throw new Error("Provider does not support language model interface");
38729
+ }
38730
+ const system = `You are an expert at creating evaluation rubrics.
38731
+ You must return a valid JSON object matching this schema:
38732
+ {
38733
+ "rubrics": [
38734
+ {
38735
+ "id": "string (short identifier)",
38736
+ "description": "string (what to check)",
38737
+ "weight": number (default 1.0),
38738
+ "required": boolean (default true)
38739
+ }
38740
+ ]
38741
+ }`;
38742
+ let result;
38743
+ let lastError;
38744
+ for (let attempt = 1; attempt <= 3; attempt++) {
38745
+ try {
38746
+ const { text: text2 } = await generateText({
38747
+ model,
38748
+ system,
38749
+ prompt
38750
+ });
38751
+ const cleaned = text2.replace(/```json\n?|```/g, "").trim();
38752
+ result = rubricGenerationSchema.parse(JSON.parse(cleaned));
38753
+ break;
38754
+ } catch (e) {
38755
+ lastError = e instanceof Error ? e : new Error(String(e));
38756
+ }
38757
+ }
38758
+ if (!result) {
38759
+ throw new Error(`Failed to parse generated rubrics after 3 attempts: ${lastError?.message}`);
38760
+ }
38761
+ return result.rubrics;
38762
+ }
38763
+ function buildPrompt(expectedOutcome, question, referenceAnswer) {
38764
+ const parts = [
38765
+ "You are an expert at creating evaluation rubrics.",
38766
+ "Given the expected outcome (and optionally the question and reference answer),",
38767
+ "generate a list of specific, measurable rubric items to evaluate whether an answer meets the expected outcome.",
38768
+ "",
38769
+ "Each rubric should:",
38770
+ "- Be specific and testable",
38771
+ "- Have a short, descriptive ID",
38772
+ "- Include a clear description of what to check",
38773
+ "- Indicate if it is required (mandatory) or optional",
38774
+ "- Have an appropriate weight (default 1.0, use higher values for more important aspects)",
38775
+ "",
38776
+ "Generate 3-7 rubric items that comprehensively cover the expected outcome.",
38777
+ "",
38778
+ "[[ ## expected_outcome ## ]]",
38779
+ expectedOutcome,
38780
+ ""
38781
+ ];
38782
+ if (question && question.trim().length > 0) {
38783
+ parts.push("[[ ## question ## ]]", question, "");
38784
+ }
38785
+ if (referenceAnswer && referenceAnswer.trim().length > 0) {
38786
+ parts.push("[[ ## reference_answer ## ]]", referenceAnswer, "");
38787
+ }
38788
+ return parts.join("\n");
38475
38789
  }
38476
38790
 
38477
38791
  // src/commands/eval/env.ts
@@ -38927,12 +39241,12 @@ var ProgressDisplay = class {
38927
39241
  }
38928
39242
  addLogPaths(paths) {
38929
39243
  const newPaths = [];
38930
- for (const path25 of paths) {
38931
- if (this.logPathSet.has(path25)) {
39244
+ for (const path26 of paths) {
39245
+ if (this.logPathSet.has(path26)) {
38932
39246
  continue;
38933
39247
  }
38934
- this.logPathSet.add(path25);
38935
- newPaths.push(path25);
39248
+ this.logPathSet.add(path26);
39249
+ newPaths.push(path26);
38936
39250
  }
38937
39251
  if (newPaths.length === 0) {
38938
39252
  return;
@@ -38948,8 +39262,8 @@ var ProgressDisplay = class {
38948
39262
  this.hasPrintedLogHeader = true;
38949
39263
  }
38950
39264
  const startIndex = this.logPaths.length - newPaths.length;
38951
- newPaths.forEach((path25, offset) => {
38952
- console.log(`${startIndex + offset + 1}. ${path25}`);
39265
+ newPaths.forEach((path26, offset) => {
39266
+ console.log(`${startIndex + offset + 1}. ${path26}`);
38953
39267
  });
38954
39268
  }
38955
39269
  scheduleRender() {
@@ -38997,8 +39311,8 @@ var ProgressDisplay = class {
38997
39311
  if (this.logPaths.length > 0) {
38998
39312
  lines.push("");
38999
39313
  lines.push("Codex CLI logs:");
39000
- this.logPaths.forEach((path25, index) => {
39001
- lines.push(`${index + 1}. ${path25}`);
39314
+ this.logPaths.forEach((path26, index) => {
39315
+ lines.push(`${index + 1}. ${path26}`);
39002
39316
  });
39003
39317
  }
39004
39318
  const rowCount = this.getRenderedRowCount(lines);
@@ -39203,24 +39517,20 @@ function formatEvaluationSummary(summary) {
39203
39517
  return lines.join("\n");
39204
39518
  }
39205
39519
 
39206
- // src/commands/eval/targets.ts
39207
- import { constants as constants5 } from "node:fs";
39208
- import { access as access5 } from "node:fs/promises";
39209
- import path17 from "node:path";
39210
-
39211
39520
  // ../../packages/core/dist/evaluation/validation/index.js
39212
39521
  import { readFile as readFile7 } from "node:fs/promises";
39522
+ import path16 from "node:path";
39213
39523
  import { parse as parse6 } from "yaml";
39214
39524
  import { readFile as readFile23 } from "node:fs/promises";
39215
- import path16 from "node:path";
39525
+ import path23 from "node:path";
39216
39526
  import { parse as parse23 } from "yaml";
39217
39527
  import { readFile as readFile33 } from "node:fs/promises";
39218
- import path23 from "node:path";
39528
+ import path33 from "node:path";
39219
39529
  import { parse as parse33 } from "yaml";
39220
39530
  import { readFile as readFile43 } from "node:fs/promises";
39221
39531
  import { parse as parse42 } from "yaml";
39222
39532
  import { readFile as readFile52 } from "node:fs/promises";
39223
- import path33 from "node:path";
39533
+ import path43 from "node:path";
39224
39534
  import { parse as parse52 } from "yaml";
39225
39535
  var SCHEMA_EVAL_V2 = "agentv-eval-v2";
39226
39536
  var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
@@ -39230,12 +39540,12 @@ async function detectFileType(filePath) {
39230
39540
  const content = await readFile7(filePath, "utf8");
39231
39541
  const parsed = parse6(content);
39232
39542
  if (typeof parsed !== "object" || parsed === null) {
39233
- return "unknown";
39543
+ return inferFileTypeFromPath(filePath);
39234
39544
  }
39235
39545
  const record2 = parsed;
39236
39546
  const schema = record2.$schema;
39237
39547
  if (typeof schema !== "string") {
39238
- return "unknown";
39548
+ return inferFileTypeFromPath(filePath);
39239
39549
  }
39240
39550
  switch (schema) {
39241
39551
  case SCHEMA_EVAL_V2:
@@ -39245,18 +39555,31 @@ async function detectFileType(filePath) {
39245
39555
  case SCHEMA_CONFIG_V22:
39246
39556
  return "config";
39247
39557
  default:
39248
- return "unknown";
39558
+ return inferFileTypeFromPath(filePath);
39249
39559
  }
39250
39560
  } catch {
39251
- return "unknown";
39561
+ return inferFileTypeFromPath(filePath);
39252
39562
  }
39253
39563
  }
39564
+ function inferFileTypeFromPath(filePath) {
39565
+ const normalized = path16.normalize(filePath).replace(/\\/g, "/");
39566
+ const basename = path16.basename(filePath);
39567
+ if (normalized.includes("/.agentv/")) {
39568
+ if (basename === "config.yaml" || basename === "config.yml") {
39569
+ return "config";
39570
+ }
39571
+ if (basename === "targets.yaml" || basename === "targets.yml") {
39572
+ return "targets";
39573
+ }
39574
+ }
39575
+ return "eval";
39576
+ }
39254
39577
  function isObject2(value) {
39255
39578
  return typeof value === "object" && value !== null && !Array.isArray(value);
39256
39579
  }
39257
39580
  async function validateEvalFile(filePath) {
39258
39581
  const errors = [];
39259
- const absolutePath = path16.resolve(filePath);
39582
+ const absolutePath = path23.resolve(filePath);
39260
39583
  let parsed;
39261
39584
  try {
39262
39585
  const content = await readFile23(absolutePath, "utf8");
@@ -39323,13 +39646,13 @@ async function validateEvalFile(filePath) {
39323
39646
  message: "Missing or invalid 'id' field (must be a non-empty string)"
39324
39647
  });
39325
39648
  }
39326
- const outcome = evalCase.outcome;
39327
- if (typeof outcome !== "string" || outcome.trim().length === 0) {
39649
+ const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
39650
+ if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
39328
39651
  errors.push({
39329
39652
  severity: "error",
39330
39653
  filePath: absolutePath,
39331
- location: `${location}.outcome`,
39332
- message: "Missing or invalid 'outcome' field (must be a non-empty string)"
39654
+ location: `${location}.expected_outcome`,
39655
+ message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
39333
39656
  });
39334
39657
  }
39335
39658
  const inputMessages = evalCase.input_messages;
@@ -39605,7 +39928,7 @@ function validateUnknownSettings(target, provider, absolutePath, location, error
39605
39928
  }
39606
39929
  async function validateTargetsFile(filePath) {
39607
39930
  const errors = [];
39608
- const absolutePath = path23.resolve(filePath);
39931
+ const absolutePath = path33.resolve(filePath);
39609
39932
  let parsed;
39610
39933
  try {
39611
39934
  const content = await readFile33(absolutePath, "utf8");
@@ -39884,8 +40207,8 @@ async function validateConfigFile(filePath) {
39884
40207
  }
39885
40208
  const config2 = parsed;
39886
40209
  const schema = config2.$schema;
39887
- if (schema !== SCHEMA_CONFIG_V222) {
39888
- const message = typeof schema === "string" ? `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}'` : `Missing required field '$schema'. Please add '$schema: ${SCHEMA_CONFIG_V222}' at the top of the file.`;
40210
+ if (schema !== void 0 && schema !== SCHEMA_CONFIG_V222) {
40211
+ const message = `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}' or omit the field.`;
39889
40212
  errors.push({
39890
40213
  severity: "error",
39891
40214
  filePath,
@@ -39947,7 +40270,7 @@ function isObject3(value) {
39947
40270
  }
39948
40271
  async function validateFileReferences(evalFilePath) {
39949
40272
  const errors = [];
39950
- const absolutePath = path33.resolve(evalFilePath);
40273
+ const absolutePath = path43.resolve(evalFilePath);
39951
40274
  const gitRoot = await findGitRoot(absolutePath);
39952
40275
  if (!gitRoot) {
39953
40276
  errors.push({
@@ -40064,19 +40387,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
40064
40387
  }
40065
40388
  }
40066
40389
 
40067
- // src/commands/eval/targets.ts
40390
+ // src/utils/targets.ts
40391
+ import { constants as constants5 } from "node:fs";
40392
+ import { access as access5 } from "node:fs/promises";
40393
+ import path17 from "node:path";
40068
40394
  var TARGET_FILE_CANDIDATES = [
40069
40395
  "targets.yaml",
40070
40396
  "targets.yml",
40071
40397
  path17.join(".agentv", "targets.yaml"),
40072
40398
  path17.join(".agentv", "targets.yml")
40073
40399
  ];
40074
- var ANSI_YELLOW7 = "\x1B[33m";
40075
- var ANSI_RED2 = "\x1B[31m";
40076
- var ANSI_RESET7 = "\x1B[0m";
40077
- function isTTY() {
40078
- return process.stdout.isTTY ?? false;
40079
- }
40080
40400
  async function fileExists5(filePath) {
40081
40401
  try {
40082
40402
  await access5(filePath, constants5.F_OK);
@@ -40085,10 +40405,6 @@ async function fileExists5(filePath) {
40085
40405
  return false;
40086
40406
  }
40087
40407
  }
40088
- async function readTestSuiteTarget(testFilePath) {
40089
- const metadata = await readTestSuiteMetadata(testFilePath);
40090
- return metadata.target;
40091
- }
40092
40408
  async function discoverTargetsFile(options) {
40093
40409
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
40094
40410
  if (explicitPath) {
@@ -40119,6 +40435,18 @@ async function discoverTargetsFile(options) {
40119
40435
  }
40120
40436
  throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
40121
40437
  }
40438
+
40439
+ // src/commands/eval/targets.ts
40440
+ var ANSI_YELLOW7 = "\x1B[33m";
40441
+ var ANSI_RED2 = "\x1B[31m";
40442
+ var ANSI_RESET7 = "\x1B[0m";
40443
+ function isTTY() {
40444
+ return process.stdout.isTTY ?? false;
40445
+ }
40446
+ async function readTestSuiteTarget(testFilePath) {
40447
+ const metadata = await readTestSuiteMetadata(testFilePath);
40448
+ return metadata.target;
40449
+ }
40122
40450
  function pickTargetName(options) {
40123
40451
  const cliName = options.cliTargetName?.trim();
40124
40452
  if (cliName && cliName !== "default") {
@@ -40299,12 +40627,12 @@ function buildDefaultOutputPath(cwd, format) {
40299
40627
  const extension = getDefaultExtension(format);
40300
40628
  return path18.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
40301
40629
  }
40302
- function resolvePromptDirectory(option, cwd) {
40303
- if (option === void 0) {
40630
+ function resolvePromptDirectory(option4, cwd) {
40631
+ if (option4 === void 0) {
40304
40632
  return void 0;
40305
40633
  }
40306
- if (typeof option === "string" && option.trim().length > 0) {
40307
- return path18.resolve(cwd, option);
40634
+ if (typeof option4 === "string" && option4.trim().length > 0) {
40635
+ return path18.resolve(cwd, option4);
40308
40636
  }
40309
40637
  return path18.join(cwd, ".agentv", "prompts");
40310
40638
  }
@@ -40608,56 +40936,119 @@ async function resolveEvaluationRunner() {
40608
40936
  }
40609
40937
 
40610
40938
  // src/commands/eval/index.ts
40611
- function parseInteger(value, fallback) {
40612
- const parsed = Number.parseInt(value, 10);
40613
- if (Number.isNaN(parsed)) {
40614
- return fallback;
40615
- }
40616
- return parsed;
40617
- }
40618
- function registerEvalCommand(program) {
40619
- program.command("eval").description("Run eval suites and report results").argument("<eval-paths...>", "Path(s) or glob(s) to evaluation .yaml file(s)").option("--target <name>", "Override target name from targets.yaml", "default").option("--targets <path>", "Path to targets.yaml (overrides discovery)").option("--eval-id <id>", "Run only the eval case with this identifier").option(
40620
- "--workers <count>",
40621
- "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
40622
- (value) => parseInteger(value, 1)
40623
- ).option("--out <path>", "Write results to the specified path").option(
40624
- "--output-format <format>",
40625
- "Output format: 'jsonl' or 'yaml' (default: jsonl)",
40626
- "jsonl"
40627
- ).option("--dry-run", "Use mock provider responses instead of real LLM calls", false).option(
40628
- "--dry-run-delay <ms>",
40629
- "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
40630
- (value) => parseInteger(value, 0),
40631
- 0
40632
- ).option(
40633
- "--dry-run-delay-min <ms>",
40634
- "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
40635
- (value) => parseInteger(value, 0),
40636
- 0
40637
- ).option(
40638
- "--dry-run-delay-max <ms>",
40639
- "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
40640
- (value) => parseInteger(value, 0),
40641
- 0
40642
- ).option(
40643
- "--agent-timeout <seconds>",
40644
- "Timeout in seconds for provider responses (default: 120)",
40645
- (value) => parseInteger(value, 120),
40646
- 120
40647
- ).option(
40648
- "--max-retries <count>",
40649
- "Retry count for timeout recoveries (default: 2)",
40650
- (value) => parseInteger(value, 2),
40651
- 2
40652
- ).option("--cache", "Enable in-memory provider response cache", false).option("--verbose", "Enable verbose logging", false).option(
40653
- "--dump-prompts [dir]",
40654
- "Persist prompt payloads for debugging (optional custom directory)"
40655
- ).action(async (evalPaths, rawOptions) => {
40656
- const resolvedPaths = await resolveEvalPaths(evalPaths, process.cwd());
40939
+ var evalCommand = command({
40940
+ name: "eval",
40941
+ description: "Run eval suites and report results",
40942
+ args: {
40943
+ evalPaths: restPositionals({
40944
+ type: string4,
40945
+ displayName: "eval-paths",
40946
+ description: "Path(s) or glob(s) to evaluation .yaml file(s)"
40947
+ }),
40948
+ target: option({
40949
+ type: string4,
40950
+ long: "target",
40951
+ description: "Override target name from targets.yaml",
40952
+ defaultValue: () => "default"
40953
+ }),
40954
+ targets: option({
40955
+ type: optional2(string4),
40956
+ long: "targets",
40957
+ description: "Path to targets.yaml (overrides discovery)"
40958
+ }),
40959
+ evalId: option({
40960
+ type: optional2(string4),
40961
+ long: "eval-id",
40962
+ description: "Run only the eval case with this identifier"
40963
+ }),
40964
+ workers: option({
40965
+ type: number4,
40966
+ long: "workers",
40967
+ description: "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
40968
+ defaultValue: () => 1
40969
+ }),
40970
+ out: option({
40971
+ type: optional2(string4),
40972
+ long: "out",
40973
+ description: "Write results to the specified path"
40974
+ }),
40975
+ outputFormat: option({
40976
+ type: string4,
40977
+ long: "output-format",
40978
+ description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
40979
+ defaultValue: () => "jsonl"
40980
+ }),
40981
+ dryRun: flag({
40982
+ long: "dry-run",
40983
+ description: "Use mock provider responses instead of real LLM calls"
40984
+ }),
40985
+ dryRunDelay: option({
40986
+ type: number4,
40987
+ long: "dry-run-delay",
40988
+ description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
40989
+ defaultValue: () => 0
40990
+ }),
40991
+ dryRunDelayMin: option({
40992
+ type: number4,
40993
+ long: "dry-run-delay-min",
40994
+ description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
40995
+ defaultValue: () => 0
40996
+ }),
40997
+ dryRunDelayMax: option({
40998
+ type: number4,
40999
+ long: "dry-run-delay-max",
41000
+ description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
41001
+ defaultValue: () => 0
41002
+ }),
41003
+ agentTimeout: option({
41004
+ type: number4,
41005
+ long: "agent-timeout",
41006
+ description: "Timeout in seconds for provider responses (default: 120)",
41007
+ defaultValue: () => 120
41008
+ }),
41009
+ maxRetries: option({
41010
+ type: number4,
41011
+ long: "max-retries",
41012
+ description: "Retry count for timeout recoveries (default: 2)",
41013
+ defaultValue: () => 2
41014
+ }),
41015
+ cache: flag({
41016
+ long: "cache",
41017
+ description: "Enable in-memory provider response cache"
41018
+ }),
41019
+ verbose: flag({
41020
+ long: "verbose",
41021
+ description: "Enable verbose logging"
41022
+ }),
41023
+ dumpPrompts: option({
41024
+ type: optional2(string4),
41025
+ long: "dump-prompts",
41026
+ description: "Directory path for persisting prompt payloads for debugging"
41027
+ })
41028
+ },
41029
+ handler: async (args) => {
41030
+ const resolvedPaths = await resolveEvalPaths(args.evalPaths, process.cwd());
41031
+ const dumpPrompts = args.dumpPrompts !== void 0 ? args.dumpPrompts === "." ? true : args.dumpPrompts : void 0;
41032
+ const rawOptions = {
41033
+ target: args.target,
41034
+ targets: args.targets,
41035
+ evalId: args.evalId,
41036
+ workers: args.workers,
41037
+ out: args.out,
41038
+ outputFormat: args.outputFormat,
41039
+ dryRun: args.dryRun,
41040
+ dryRunDelay: args.dryRunDelay,
41041
+ dryRunDelayMin: args.dryRunDelayMin,
41042
+ dryRunDelayMax: args.dryRunDelayMax,
41043
+ agentTimeout: args.agentTimeout,
41044
+ maxRetries: args.maxRetries,
41045
+ cache: args.cache,
41046
+ verbose: args.verbose,
41047
+ dumpPrompts
41048
+ };
40657
41049
  await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
40658
- });
40659
- return program;
40660
- }
41050
+ }
41051
+ });
40661
41052
  async function resolveEvalPaths(evalPaths, cwd) {
40662
41053
  const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
40663
41054
  if (normalizedInputs.length === 0) {
@@ -40705,14 +41096,201 @@ async function resolveEvalPaths(evalPaths, cwd) {
40705
41096
  return sorted;
40706
41097
  }
40707
41098
 
41099
+ // src/commands/generate/index.ts
41100
+ import { command as command2, flag as flag2, option as option2, optional as optional3, positional as positional2, string as string5, subcommands } from "cmd-ts";
41101
+
41102
+ // src/commands/generate/rubrics.ts
41103
+ import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
41104
+ import path20 from "node:path";
41105
+ import { pathToFileURL as pathToFileURL2 } from "node:url";
41106
+ import { isMap, isSeq, parseDocument } from "yaml";
41107
+ function isJsonObject3(value) {
41108
+ return typeof value === "object" && value !== null && !Array.isArray(value);
41109
+ }
41110
+ function asString6(value) {
41111
+ return typeof value === "string" ? value : void 0;
41112
+ }
41113
+ async function loadRubricGenerator() {
41114
+ const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
41115
+ if (customGenerator) {
41116
+ const generatorPath = path20.resolve(customGenerator);
41117
+ const generatorUrl = pathToFileURL2(generatorPath).href;
41118
+ const module = await import(generatorUrl);
41119
+ return module.generateRubrics;
41120
+ }
41121
+ return generateRubrics;
41122
+ }
41123
+ async function generateRubricsCommand(options) {
41124
+ const { file: file2, target: targetOverride, verbose } = options;
41125
+ console.log(`Generating rubrics for: ${file2}`);
41126
+ const absolutePath = path20.resolve(file2);
41127
+ const content = await readFile8(absolutePath, "utf8");
41128
+ const doc = parseDocument(content);
41129
+ const parsed = doc.toJSON();
41130
+ if (!isJsonObject3(parsed)) {
41131
+ throw new Error(`Invalid YAML file format: ${file2}`);
41132
+ }
41133
+ const suite = parsed;
41134
+ const evalcases = suite.evalcases;
41135
+ if (!Array.isArray(evalcases)) {
41136
+ throw new Error(`No evalcases found in ${file2}`);
41137
+ }
41138
+ const targetSelection = await selectTarget({
41139
+ testFilePath: absolutePath,
41140
+ repoRoot: process.cwd(),
41141
+ cwd: process.cwd(),
41142
+ cliTargetName: targetOverride,
41143
+ dryRun: false,
41144
+ dryRunDelay: 0,
41145
+ dryRunDelayMin: 0,
41146
+ dryRunDelayMax: 0,
41147
+ env: process.env
41148
+ });
41149
+ if (verbose) {
41150
+ console.log(`Using target: ${targetSelection.targetName}`);
41151
+ }
41152
+ const provider = createProvider(targetSelection.resolvedTarget);
41153
+ const generateRubricsFunc = await loadRubricGenerator();
41154
+ let updatedCount = 0;
41155
+ let skippedCount = 0;
41156
+ const evalcasesNode = doc.getIn(["evalcases"]);
41157
+ if (!evalcasesNode || !isSeq(evalcasesNode)) {
41158
+ throw new Error("evalcases must be a sequence");
41159
+ }
41160
+ for (let i = 0; i < evalcases.length; i++) {
41161
+ const rawCase = evalcases[i];
41162
+ if (!isJsonObject3(rawCase)) {
41163
+ continue;
41164
+ }
41165
+ const evalCase = rawCase;
41166
+ const id = asString6(evalCase.id) ?? "unknown";
41167
+ const expectedOutcome = asString6(evalCase.expected_outcome) ?? asString6(evalCase.outcome);
41168
+ if (!expectedOutcome) {
41169
+ if (verbose) {
41170
+ console.log(` Skipping ${id}: no expected_outcome`);
41171
+ }
41172
+ skippedCount++;
41173
+ continue;
41174
+ }
41175
+ if (evalCase.rubrics !== void 0) {
41176
+ if (verbose) {
41177
+ console.log(` Skipping ${id}: rubrics already defined`);
41178
+ }
41179
+ skippedCount++;
41180
+ continue;
41181
+ }
41182
+ console.log(` Generating rubrics for: ${id}`);
41183
+ const question = extractQuestion(evalCase);
41184
+ const referenceAnswer = asString6(evalCase.reference_answer);
41185
+ const rubrics = await generateRubricsFunc({
41186
+ expectedOutcome,
41187
+ question,
41188
+ referenceAnswer,
41189
+ provider
41190
+ });
41191
+ const caseNode = evalcasesNode.items[i];
41192
+ if (caseNode && isMap(caseNode)) {
41193
+ caseNode.set(
41194
+ "rubrics",
41195
+ rubrics.map(
41196
+ (r) => ({
41197
+ id: r.id,
41198
+ description: r.description,
41199
+ weight: r.weight,
41200
+ required: r.required
41201
+ })
41202
+ )
41203
+ );
41204
+ }
41205
+ updatedCount++;
41206
+ if (verbose) {
41207
+ console.log(` Generated ${rubrics.length} rubric(s)`);
41208
+ }
41209
+ }
41210
+ if (updatedCount > 0) {
41211
+ const output = doc.toString();
41212
+ await writeFile6(absolutePath, output, "utf8");
41213
+ console.log(`
41214
+ Updated ${updatedCount} eval case(s) with generated rubrics`);
41215
+ if (skippedCount > 0) {
41216
+ console.log(`Skipped ${skippedCount} eval case(s)`);
41217
+ }
41218
+ } else {
41219
+ console.log("\nNo eval cases updated (all already have rubrics or missing expected_outcome)");
41220
+ }
41221
+ }
41222
+ function extractQuestion(evalCase) {
41223
+ const explicitQuestion = asString6(evalCase.question);
41224
+ if (explicitQuestion) {
41225
+ return explicitQuestion;
41226
+ }
41227
+ const inputMessages = evalCase.input_messages;
41228
+ if (!Array.isArray(inputMessages)) {
41229
+ return void 0;
41230
+ }
41231
+ for (const msg of inputMessages) {
41232
+ if (!isJsonObject3(msg)) {
41233
+ continue;
41234
+ }
41235
+ if (msg.role === "user" && typeof msg.content === "string") {
41236
+ return msg.content;
41237
+ }
41238
+ }
41239
+ return void 0;
41240
+ }
41241
+
41242
+ // src/commands/generate/index.ts
41243
+ var rubricsCommand = command2({
41244
+ name: "rubrics",
41245
+ description: "Generate rubrics from expected_outcome in YAML eval file",
41246
+ args: {
41247
+ file: positional2({
41248
+ type: string5,
41249
+ displayName: "file",
41250
+ description: "Path to YAML eval file"
41251
+ }),
41252
+ target: option2({
41253
+ type: optional3(string5),
41254
+ long: "target",
41255
+ short: "t",
41256
+ description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
41257
+ }),
41258
+ verbose: flag2({
41259
+ long: "verbose",
41260
+ short: "v",
41261
+ description: "Show detailed progress"
41262
+ })
41263
+ },
41264
+ handler: async ({ file: file2, target, verbose }) => {
41265
+ try {
41266
+ await generateRubricsCommand({
41267
+ file: file2,
41268
+ target,
41269
+ verbose
41270
+ });
41271
+ } catch (error40) {
41272
+ console.error(`Error: ${error40.message}`);
41273
+ process.exit(1);
41274
+ }
41275
+ }
41276
+ });
41277
+ var generateCommand = subcommands({
41278
+ name: "generate",
41279
+ description: "Generate evaluation artifacts",
41280
+ cmds: {
41281
+ rubrics: rubricsCommand
41282
+ }
41283
+ });
41284
+
40708
41285
  // src/commands/init/index.ts
40709
41286
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
40710
- import path21 from "node:path";
41287
+ import path24 from "node:path";
40711
41288
  import * as readline from "node:readline/promises";
41289
+ import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
40712
41290
 
40713
41291
  // src/templates/index.ts
40714
41292
  import { readFileSync, readdirSync, statSync } from "node:fs";
40715
- import path20 from "node:path";
41293
+ import path21 from "node:path";
40716
41294
  import { fileURLToPath } from "node:url";
40717
41295
  function getGithubTemplates() {
40718
41296
  return getTemplatesFromDir(".github");
@@ -40724,12 +41302,12 @@ function getClaudeTemplates() {
40724
41302
  return getTemplatesFromDir(".claude");
40725
41303
  }
40726
41304
  function getTemplatesFromDir(subdir) {
40727
- const currentDir = path20.dirname(fileURLToPath(import.meta.url));
41305
+ const currentDir = path21.dirname(fileURLToPath(import.meta.url));
40728
41306
  let templatesDir;
40729
- if (currentDir.includes(`${path20.sep}dist`)) {
40730
- templatesDir = path20.join(currentDir, "templates", subdir);
41307
+ if (currentDir.includes(`${path21.sep}dist`)) {
41308
+ templatesDir = path21.join(currentDir, "templates", subdir);
40731
41309
  } else {
40732
- templatesDir = path20.join(currentDir, subdir);
41310
+ templatesDir = path21.join(currentDir, subdir);
40733
41311
  }
40734
41312
  return readTemplatesRecursively(templatesDir, "");
40735
41313
  }
@@ -40737,15 +41315,15 @@ function readTemplatesRecursively(dir, relativePath) {
40737
41315
  const templates = [];
40738
41316
  const entries = readdirSync(dir);
40739
41317
  for (const entry of entries) {
40740
- const fullPath = path20.join(dir, entry);
41318
+ const fullPath = path21.join(dir, entry);
40741
41319
  const stat6 = statSync(fullPath);
40742
- const entryRelativePath = relativePath ? path20.join(relativePath, entry) : entry;
41320
+ const entryRelativePath = relativePath ? path21.join(relativePath, entry) : entry;
40743
41321
  if (stat6.isDirectory()) {
40744
41322
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
40745
41323
  } else {
40746
41324
  const content = readFileSync(fullPath, "utf-8");
40747
41325
  templates.push({
40748
- path: entryRelativePath.split(path20.sep).join("/"),
41326
+ path: entryRelativePath.split(path21.sep).join("/"),
40749
41327
  // Normalize to forward slashes
40750
41328
  content
40751
41329
  });
@@ -40768,10 +41346,10 @@ async function promptYesNo(message) {
40768
41346
  }
40769
41347
  }
40770
41348
  async function initCommand(options = {}) {
40771
- const targetPath = path21.resolve(options.targetPath ?? ".");
40772
- const githubDir = path21.join(targetPath, ".github");
40773
- const agentvDir = path21.join(targetPath, ".agentv");
40774
- const claudeDir = path21.join(targetPath, ".claude");
41349
+ const targetPath = path24.resolve(options.targetPath ?? ".");
41350
+ const githubDir = path24.join(targetPath, ".github");
41351
+ const agentvDir = path24.join(targetPath, ".agentv");
41352
+ const claudeDir = path24.join(targetPath, ".claude");
40775
41353
  const githubTemplates = getGithubTemplates();
40776
41354
  const agentvTemplates = getAgentvTemplates();
40777
41355
  const claudeTemplates = getClaudeTemplates();
@@ -40779,32 +41357,32 @@ async function initCommand(options = {}) {
40779
41357
  const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
40780
41358
  const existingFiles = [];
40781
41359
  if (envTemplate) {
40782
- const envFilePath = path21.join(targetPath, ".env.template");
41360
+ const envFilePath = path24.join(targetPath, ".env.template");
40783
41361
  if (existsSync(envFilePath)) {
40784
41362
  existingFiles.push(".env.template");
40785
41363
  }
40786
41364
  }
40787
41365
  if (existsSync(githubDir)) {
40788
41366
  for (const template of githubTemplates) {
40789
- const targetFilePath = path21.join(githubDir, template.path);
41367
+ const targetFilePath = path24.join(githubDir, template.path);
40790
41368
  if (existsSync(targetFilePath)) {
40791
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41369
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40792
41370
  }
40793
41371
  }
40794
41372
  }
40795
41373
  if (existsSync(agentvDir)) {
40796
41374
  for (const template of otherAgentvTemplates) {
40797
- const targetFilePath = path21.join(agentvDir, template.path);
41375
+ const targetFilePath = path24.join(agentvDir, template.path);
40798
41376
  if (existsSync(targetFilePath)) {
40799
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41377
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40800
41378
  }
40801
41379
  }
40802
41380
  }
40803
41381
  if (existsSync(claudeDir)) {
40804
41382
  for (const template of claudeTemplates) {
40805
- const targetFilePath = path21.join(claudeDir, template.path);
41383
+ const targetFilePath = path24.join(claudeDir, template.path);
40806
41384
  if (existsSync(targetFilePath)) {
40807
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41385
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40808
41386
  }
40809
41387
  }
40810
41388
  }
@@ -40831,36 +41409,36 @@ async function initCommand(options = {}) {
40831
41409
  mkdirSync(claudeDir, { recursive: true });
40832
41410
  }
40833
41411
  if (envTemplate) {
40834
- const envFilePath = path21.join(targetPath, ".env.template");
41412
+ const envFilePath = path24.join(targetPath, ".env.template");
40835
41413
  writeFileSync(envFilePath, envTemplate.content, "utf-8");
40836
41414
  console.log("Created .env.template");
40837
41415
  }
40838
41416
  for (const template of githubTemplates) {
40839
- const targetFilePath = path21.join(githubDir, template.path);
40840
- const targetDirPath = path21.dirname(targetFilePath);
41417
+ const targetFilePath = path24.join(githubDir, template.path);
41418
+ const targetDirPath = path24.dirname(targetFilePath);
40841
41419
  if (!existsSync(targetDirPath)) {
40842
41420
  mkdirSync(targetDirPath, { recursive: true });
40843
41421
  }
40844
41422
  writeFileSync(targetFilePath, template.content, "utf-8");
40845
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41423
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40846
41424
  }
40847
41425
  for (const template of otherAgentvTemplates) {
40848
- const targetFilePath = path21.join(agentvDir, template.path);
40849
- const targetDirPath = path21.dirname(targetFilePath);
41426
+ const targetFilePath = path24.join(agentvDir, template.path);
41427
+ const targetDirPath = path24.dirname(targetFilePath);
40850
41428
  if (!existsSync(targetDirPath)) {
40851
41429
  mkdirSync(targetDirPath, { recursive: true });
40852
41430
  }
40853
41431
  writeFileSync(targetFilePath, template.content, "utf-8");
40854
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41432
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40855
41433
  }
40856
41434
  for (const template of claudeTemplates) {
40857
- const targetFilePath = path21.join(claudeDir, template.path);
40858
- const targetDirPath = path21.dirname(targetFilePath);
41435
+ const targetFilePath = path24.join(claudeDir, template.path);
41436
+ const targetDirPath = path24.dirname(targetFilePath);
40859
41437
  if (!existsSync(targetDirPath)) {
40860
41438
  mkdirSync(targetDirPath, { recursive: true });
40861
41439
  }
40862
41440
  writeFileSync(targetFilePath, template.content, "utf-8");
40863
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41441
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40864
41442
  }
40865
41443
  console.log("\nAgentV initialized successfully!");
40866
41444
  console.log("\nFiles installed to root:");
@@ -40868,17 +41446,17 @@ async function initCommand(options = {}) {
40868
41446
  console.log(" - .env.template");
40869
41447
  }
40870
41448
  console.log(`
40871
- Files installed to ${path21.relative(targetPath, githubDir)}:`);
41449
+ Files installed to ${path24.relative(targetPath, githubDir)}:`);
40872
41450
  for (const t of githubTemplates) {
40873
41451
  console.log(` - ${t.path}`);
40874
41452
  }
40875
41453
  console.log(`
40876
- Files installed to ${path21.relative(targetPath, agentvDir)}:`);
41454
+ Files installed to ${path24.relative(targetPath, agentvDir)}:`);
40877
41455
  for (const t of otherAgentvTemplates) {
40878
41456
  console.log(` - ${t.path}`);
40879
41457
  }
40880
41458
  console.log(`
40881
- Files installed to ${path21.relative(targetPath, claudeDir)}:`);
41459
+ Files installed to ${path24.relative(targetPath, claudeDir)}:`);
40882
41460
  for (const t of claudeTemplates) {
40883
41461
  console.log(` - ${t.path}`);
40884
41462
  }
@@ -40887,15 +41465,28 @@ Files installed to ${path21.relative(targetPath, claudeDir)}:`);
40887
41465
  console.log(" 2. Configure targets in .agentv/targets.yaml");
40888
41466
  console.log(" 3. Create eval files using the schema and prompt templates");
40889
41467
  }
41468
+ var initCmdTsCommand = command3({
41469
+ name: "init",
41470
+ description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
41471
+ args: {
41472
+ path: option3({
41473
+ type: optional4(string6),
41474
+ long: "path",
41475
+ description: "Target directory for initialization (default: current directory)"
41476
+ })
41477
+ },
41478
+ handler: async ({ path: targetPath }) => {
41479
+ try {
41480
+ await initCommand({ targetPath });
41481
+ } catch (error40) {
41482
+ console.error(`Error: ${error40.message}`);
41483
+ process.exit(1);
41484
+ }
41485
+ }
41486
+ });
40890
41487
 
40891
- // src/commands/status.ts
40892
- function registerStatusCommand(program) {
40893
- program.command("status").description("Show the latest AgentV kernel status").action(() => {
40894
- const kernel = createAgentKernel();
40895
- console.log(`Kernel status: ${kernel.status}`);
40896
- });
40897
- return program;
40898
- }
41488
+ // src/commands/validate/index.ts
41489
+ import { command as command4, restPositionals as restPositionals2, string as string7 } from "cmd-ts";
40899
41490
 
40900
41491
  // src/commands/validate/format-output.ts
40901
41492
  var ANSI_RED3 = "\x1B[31m";
@@ -40980,7 +41571,7 @@ function isTTY2() {
40980
41571
  // src/commands/validate/validate-files.ts
40981
41572
  import { constants as constants7 } from "node:fs";
40982
41573
  import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
40983
- import path24 from "node:path";
41574
+ import path25 from "node:path";
40984
41575
  async function validateFiles(paths) {
40985
41576
  const filePaths = await expandPaths(paths);
40986
41577
  const results = [];
@@ -40998,22 +41589,8 @@ async function validateFiles(paths) {
40998
41589
  };
40999
41590
  }
41000
41591
  async function validateSingleFile(filePath) {
41001
- const absolutePath = path24.resolve(filePath);
41592
+ const absolutePath = path25.resolve(filePath);
41002
41593
  const fileType = await detectFileType(absolutePath);
41003
- if (fileType === "unknown") {
41004
- return {
41005
- valid: false,
41006
- filePath: absolutePath,
41007
- fileType: "unknown",
41008
- errors: [
41009
- {
41010
- severity: "error",
41011
- filePath: absolutePath,
41012
- message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2', 'agentv-targets-v2', or 'agentv-config-v2'"
41013
- }
41014
- ]
41015
- };
41016
- }
41017
41594
  let result;
41018
41595
  if (fileType === "eval") {
41019
41596
  result = await validateEvalFile(absolutePath);
@@ -41037,7 +41614,7 @@ async function validateSingleFile(filePath) {
41037
41614
  async function expandPaths(paths) {
41038
41615
  const expanded = [];
41039
41616
  for (const inputPath of paths) {
41040
- const absolutePath = path24.resolve(inputPath);
41617
+ const absolutePath = path25.resolve(inputPath);
41041
41618
  try {
41042
41619
  await access7(absolutePath, constants7.F_OK);
41043
41620
  } catch {
@@ -41061,7 +41638,7 @@ async function findYamlFiles(dirPath) {
41061
41638
  try {
41062
41639
  const entries = await readdir3(dirPath, { withFileTypes: true });
41063
41640
  for (const entry of entries) {
41064
- const fullPath = path24.join(dirPath, entry.name);
41641
+ const fullPath = path25.join(dirPath, entry.name);
41065
41642
  if (entry.isDirectory()) {
41066
41643
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
41067
41644
  continue;
@@ -41078,12 +41655,12 @@ async function findYamlFiles(dirPath) {
41078
41655
  return results;
41079
41656
  }
41080
41657
  function isYamlFile(filePath) {
41081
- const ext = path24.extname(filePath).toLowerCase();
41658
+ const ext = path25.extname(filePath).toLowerCase();
41082
41659
  return ext === ".yaml" || ext === ".yml";
41083
41660
  }
41084
41661
 
41085
41662
  // src/commands/validate/index.ts
41086
- async function runValidateCommand(paths, _options) {
41663
+ async function runValidateCommand(paths) {
41087
41664
  if (paths.length === 0) {
41088
41665
  console.error("Error: No paths specified. Usage: agentv validate <paths...>");
41089
41666
  process.exit(1);
@@ -41095,46 +41672,45 @@ async function runValidateCommand(paths, _options) {
41095
41672
  process.exit(1);
41096
41673
  }
41097
41674
  }
41098
- function registerValidateCommand(program) {
41099
- program.command("validate").description("Validate AgentV eval and targets YAML files").argument("<paths...>", "Files or directories to validate").action(async (paths, _options) => {
41675
+ var validateCommand = command4({
41676
+ name: "validate",
41677
+ description: "Validate AgentV eval and targets YAML files",
41678
+ args: {
41679
+ paths: restPositionals2({
41680
+ type: string7,
41681
+ displayName: "paths",
41682
+ description: "Files or directories to validate"
41683
+ })
41684
+ },
41685
+ handler: async ({ paths }) => {
41100
41686
  try {
41101
- await runValidateCommand(paths, _options);
41687
+ await runValidateCommand(paths);
41102
41688
  } catch (error40) {
41103
41689
  console.error(`Error: ${error40.message}`);
41104
41690
  process.exit(1);
41105
41691
  }
41106
- });
41107
- return program;
41108
- }
41692
+ }
41693
+ });
41109
41694
 
41110
41695
  // src/index.ts
41111
41696
  var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
41112
- function createProgram() {
41113
- const program = new Command();
41114
- program.name("agentv").description("AgentV CLI scaffolding").version(packageJson.version);
41115
- registerStatusCommand(program);
41116
- registerEvalCommand(program);
41117
- registerValidateCommand(program);
41118
- program.command("init [path]").description(
41119
- "Initialize AgentV in your project (installs prompt templates and schema to .github)"
41120
- ).action(async (targetPath) => {
41121
- try {
41122
- await initCommand({ targetPath });
41123
- } catch (error40) {
41124
- console.error(`Error: ${error40.message}`);
41125
- process.exit(1);
41126
- }
41127
- });
41128
- return program;
41129
- }
41697
+ var app = subcommands2({
41698
+ name: "agentv",
41699
+ description: "AgentV CLI",
41700
+ version: packageJson.version,
41701
+ cmds: {
41702
+ eval: evalCommand,
41703
+ validate: validateCommand,
41704
+ generate: generateCommand,
41705
+ init: initCmdTsCommand
41706
+ }
41707
+ });
41130
41708
  async function runCli(argv = process.argv) {
41131
- const program = createProgram();
41132
- await program.parseAsync(argv);
41133
- return program;
41709
+ await run(binary(app), argv);
41134
41710
  }
41135
41711
 
41136
41712
  export {
41137
- createProgram,
41713
+ app,
41138
41714
  runCli
41139
41715
  };
41140
- //# sourceMappingURL=chunk-GDGNKNKP.js.map
41716
+ //# sourceMappingURL=chunk-WOCXZEH4.js.map