agentv 0.20.1 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -155,7 +155,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
155
155
  import path18 from "node:path";
156
156
  import { pathToFileURL } from "node:url";
157
157
 
158
- // ../../packages/core/dist/chunk-SVY324GN.js
158
+ // ../../packages/core/dist/chunk-BO7KG7JX.js
159
159
  import { constants } from "node:fs";
160
160
  import { access, readFile } from "node:fs/promises";
161
161
  import path from "node:path";
@@ -638,8 +638,8 @@ function getErrorMap() {
638
638
 
639
639
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
640
640
  var makeIssue = (params) => {
641
- const { data, path: path25, errorMaps, issueData } = params;
642
- const fullPath = [...path25, ...issueData.path || []];
641
+ const { data, path: path26, errorMaps, issueData } = params;
642
+ const fullPath = [...path26, ...issueData.path || []];
643
643
  const fullIssue = {
644
644
  ...issueData,
645
645
  path: fullPath
@@ -755,11 +755,11 @@ var errorUtil;
755
755
 
756
756
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
757
757
  var ParseInputLazyPath = class {
758
- constructor(parent, value, path25, key2) {
758
+ constructor(parent, value, path26, key2) {
759
759
  this._cachedPath = [];
760
760
  this.parent = parent;
761
761
  this.data = value;
762
- this._path = path25;
762
+ this._path = path26;
763
763
  this._key = key2;
764
764
  }
765
765
  get path() {
@@ -4201,7 +4201,7 @@ var coerce = {
4201
4201
  };
4202
4202
  var NEVER = INVALID;
4203
4203
 
4204
- // ../../packages/core/dist/chunk-SVY324GN.js
4204
+ // ../../packages/core/dist/chunk-BO7KG7JX.js
4205
4205
  async function fileExists(filePath) {
4206
4206
  try {
4207
4207
  await access(filePath, constants.F_OK);
@@ -5976,10 +5976,10 @@ function assignProp(target, prop, value) {
5976
5976
  configurable: true
5977
5977
  });
5978
5978
  }
5979
- function getElementAtPath(obj, path25) {
5980
- if (!path25)
5979
+ function getElementAtPath(obj, path26) {
5980
+ if (!path26)
5981
5981
  return obj;
5982
- return path25.reduce((acc, key2) => acc?.[key2], obj);
5982
+ return path26.reduce((acc, key2) => acc?.[key2], obj);
5983
5983
  }
5984
5984
  function promiseAllObject(promisesObj) {
5985
5985
  const keys = Object.keys(promisesObj);
@@ -6299,11 +6299,11 @@ function aborted(x, startIndex = 0) {
6299
6299
  }
6300
6300
  return false;
6301
6301
  }
6302
- function prefixIssues(path25, issues) {
6302
+ function prefixIssues(path26, issues) {
6303
6303
  return issues.map((iss) => {
6304
6304
  var _a17;
6305
6305
  (_a17 = iss).path ?? (_a17.path = []);
6306
- iss.path.unshift(path25);
6306
+ iss.path.unshift(path26);
6307
6307
  return iss;
6308
6308
  });
6309
6309
  }
@@ -6440,7 +6440,7 @@ function treeifyError(error40, _mapper) {
6440
6440
  return issue2.message;
6441
6441
  };
6442
6442
  const result = { errors: [] };
6443
- const processError = (error41, path25 = []) => {
6443
+ const processError = (error41, path26 = []) => {
6444
6444
  var _a17, _b8;
6445
6445
  for (const issue2 of error41.issues) {
6446
6446
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6450,7 +6450,7 @@ function treeifyError(error40, _mapper) {
6450
6450
  } else if (issue2.code === "invalid_element") {
6451
6451
  processError({ issues: issue2.issues }, issue2.path);
6452
6452
  } else {
6453
- const fullpath = [...path25, ...issue2.path];
6453
+ const fullpath = [...path26, ...issue2.path];
6454
6454
  if (fullpath.length === 0) {
6455
6455
  result.errors.push(mapper(issue2));
6456
6456
  continue;
@@ -6480,9 +6480,9 @@ function treeifyError(error40, _mapper) {
6480
6480
  processError(error40);
6481
6481
  return result;
6482
6482
  }
6483
- function toDotPath(path25) {
6483
+ function toDotPath(path26) {
6484
6484
  const segs = [];
6485
- for (const seg of path25) {
6485
+ for (const seg of path26) {
6486
6486
  if (typeof seg === "number")
6487
6487
  segs.push(`[${seg}]`);
6488
6488
  else if (typeof seg === "symbol")
@@ -26035,14 +26035,14 @@ function createAzure(options = {}) {
26035
26035
  description: "Azure OpenAI resource name"
26036
26036
  });
26037
26037
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26038
- const url2 = ({ path: path25, modelId }) => {
26038
+ const url2 = ({ path: path26, modelId }) => {
26039
26039
  var _a24;
26040
26040
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26041
26041
  let fullUrl;
26042
26042
  if (options.useDeploymentBasedUrls) {
26043
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path25}`);
26043
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path26}`);
26044
26044
  } else {
26045
- fullUrl = new URL(`${baseUrlPrefix}/v1${path25}`);
26045
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path26}`);
26046
26046
  }
26047
26047
  fullUrl.searchParams.set("api-version", apiVersion);
26048
26048
  return fullUrl.toString();
@@ -34553,7 +34553,7 @@ function isTestMessage(value) {
34553
34553
  }
34554
34554
  return candidate.content.every(isJsonObject);
34555
34555
  }
34556
- var EVALUATOR_KIND_VALUES = ["code", "llm_judge"];
34556
+ var EVALUATOR_KIND_VALUES = ["code", "llm_judge", "rubric"];
34557
34557
  var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
34558
34558
  function isEvaluatorKind(value) {
34559
34559
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
@@ -34920,6 +34920,29 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
34920
34920
  }
34921
34921
  }
34922
34922
  const _model = asString2(rawEvaluator.model);
34923
+ if (typeValue === "rubric") {
34924
+ const rubrics = rawEvaluator.rubrics;
34925
+ if (!Array.isArray(rubrics)) {
34926
+ logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': missing rubrics array`);
34927
+ continue;
34928
+ }
34929
+ const parsedRubrics = rubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
34930
+ id: asString2(rubric.id) ?? `rubric-${index + 1}`,
34931
+ description: asString2(rubric.description) ?? "",
34932
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
34933
+ required: typeof rubric.required === "boolean" ? rubric.required : true
34934
+ })).filter((r) => r.description.length > 0);
34935
+ if (parsedRubrics.length === 0) {
34936
+ logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': no valid rubrics found`);
34937
+ continue;
34938
+ }
34939
+ evaluators.push({
34940
+ name: name16,
34941
+ type: "rubric",
34942
+ rubrics: parsedRubrics
34943
+ });
34944
+ continue;
34945
+ }
34923
34946
  evaluators.push({
34924
34947
  name: name16,
34925
34948
  type: "llm_judge",
@@ -35390,7 +35413,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35390
35413
  continue;
35391
35414
  }
35392
35415
  const conversationId = asString5(evalcase.conversation_id);
35393
- const outcome = asString5(evalcase.outcome);
35416
+ const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
35394
35417
  const inputMessagesValue = evalcase.input_messages;
35395
35418
  const expectedMessagesValue = evalcase.expected_messages;
35396
35419
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
@@ -35444,6 +35467,33 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35444
35467
  logError(`Skipping eval case '${id}': ${message}`);
35445
35468
  continue;
35446
35469
  }
35470
+ const inlineRubrics = evalcase.rubrics;
35471
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
35472
+ const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
35473
+ if (typeof rubric === "string") {
35474
+ return {
35475
+ id: `rubric-${index + 1}`,
35476
+ description: rubric,
35477
+ weight: 1,
35478
+ required: true
35479
+ };
35480
+ }
35481
+ return {
35482
+ id: asString5(rubric.id) ?? `rubric-${index + 1}`,
35483
+ description: asString5(rubric.description) ?? "",
35484
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
35485
+ required: typeof rubric.required === "boolean" ? rubric.required : true
35486
+ };
35487
+ }).filter((r) => r.description.length > 0);
35488
+ if (rubricItems.length > 0) {
35489
+ const rubricEvaluator = {
35490
+ name: "rubric",
35491
+ type: "rubric",
35492
+ rubrics: rubricItems
35493
+ };
35494
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
35495
+ }
35496
+ }
35447
35497
  const userFilePaths = [];
35448
35498
  for (const segment of inputSegments) {
35449
35499
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -35536,6 +35586,9 @@ var AzureProvider = class {
35536
35586
  retryConfig: this.retryConfig
35537
35587
  });
35538
35588
  }
35589
+ asLanguageModel() {
35590
+ return this.model;
35591
+ }
35539
35592
  };
35540
35593
  var AnthropicProvider = class {
35541
35594
  constructor(targetName, config2) {
@@ -35569,6 +35622,9 @@ var AnthropicProvider = class {
35569
35622
  providerOptions
35570
35623
  });
35571
35624
  }
35625
+ asLanguageModel() {
35626
+ return this.model;
35627
+ }
35572
35628
  };
35573
35629
  var GeminiProvider = class {
35574
35630
  constructor(targetName, config2) {
@@ -35599,6 +35655,9 @@ var GeminiProvider = class {
35599
35655
  retryConfig: this.retryConfig
35600
35656
  });
35601
35657
  }
35658
+ asLanguageModel() {
35659
+ return this.model;
35660
+ }
35602
35661
  };
35603
35662
  function buildAzureOptions(config2) {
35604
35663
  const options = {
@@ -37262,6 +37321,144 @@ function createProvider(target) {
37262
37321
  }
37263
37322
  }
37264
37323
  }
37324
+ var rubricCheckResultSchema = external_exports.object({
37325
+ id: external_exports.string().describe("The ID of the rubric item being checked"),
37326
+ satisfied: external_exports.boolean().describe("Whether this rubric requirement is met"),
37327
+ reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this check")
37328
+ });
37329
+ var rubricEvaluationSchema = external_exports.object({
37330
+ checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
37331
+ overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
37332
+ });
37333
+ var RubricEvaluator = class {
37334
+ kind = "rubric";
37335
+ config;
37336
+ resolveJudgeProvider;
37337
+ constructor(options) {
37338
+ this.config = options.config;
37339
+ this.resolveJudgeProvider = options.resolveJudgeProvider;
37340
+ }
37341
+ async evaluate(context) {
37342
+ const judgeProvider = await this.resolveJudgeProvider(context);
37343
+ if (!judgeProvider) {
37344
+ throw new Error("No judge provider available for rubric evaluation");
37345
+ }
37346
+ if (!this.config.rubrics || this.config.rubrics.length === 0) {
37347
+ throw new Error(
37348
+ `No rubrics found for evaluator "${this.config.name}". Run "agentv generate rubrics" first.`
37349
+ );
37350
+ }
37351
+ const prompt = this.buildPrompt(context, this.config.rubrics);
37352
+ const model = judgeProvider.asLanguageModel?.();
37353
+ if (!model) {
37354
+ throw new Error("Judge provider does not support language model interface");
37355
+ }
37356
+ const system = `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
37357
+ You must return a valid JSON object matching this schema:
37358
+ {
37359
+ "checks": [
37360
+ {
37361
+ "id": "string (rubric id)",
37362
+ "satisfied": boolean,
37363
+ "reasoning": "string (brief explanation)"
37364
+ }
37365
+ ],
37366
+ "overall_reasoning": "string (summary)"
37367
+ }`;
37368
+ let result;
37369
+ let lastError;
37370
+ for (let attempt = 1; attempt <= 3; attempt++) {
37371
+ try {
37372
+ const { text: text2 } = await generateText({
37373
+ model,
37374
+ system,
37375
+ prompt
37376
+ });
37377
+ const cleaned = text2.replace(/```json\n?|```/g, "").trim();
37378
+ result = rubricEvaluationSchema.parse(JSON.parse(cleaned));
37379
+ break;
37380
+ } catch (e) {
37381
+ lastError = e instanceof Error ? e : new Error(String(e));
37382
+ }
37383
+ }
37384
+ if (!result) {
37385
+ throw new Error(
37386
+ `Failed to parse rubric evaluation result after 3 attempts: ${lastError?.message}`
37387
+ );
37388
+ }
37389
+ const { score, verdict, hits, misses } = this.calculateScore(result, this.config.rubrics);
37390
+ return {
37391
+ score,
37392
+ verdict,
37393
+ hits,
37394
+ misses,
37395
+ expectedAspectCount: this.config.rubrics.length,
37396
+ reasoning: result.overall_reasoning,
37397
+ evaluatorRawRequest: {
37398
+ prompt
37399
+ }
37400
+ };
37401
+ }
37402
+ buildPrompt(context, rubrics) {
37403
+ const parts = [
37404
+ "You are an expert evaluator. Evaluate the candidate answer against each rubric item below.",
37405
+ "",
37406
+ "[[ ## question ## ]]",
37407
+ context.evalCase.question,
37408
+ "",
37409
+ "[[ ## expected_outcome ## ]]",
37410
+ context.evalCase.expected_outcome,
37411
+ ""
37412
+ ];
37413
+ if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
37414
+ parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
37415
+ }
37416
+ parts.push("[[ ## candidate_answer ## ]]", context.candidate, "", "[[ ## rubrics ## ]]");
37417
+ for (const rubric of rubrics) {
37418
+ const requiredLabel = rubric.required ? " (REQUIRED)" : "";
37419
+ const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
37420
+ parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
37421
+ }
37422
+ parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
37423
+ return parts.join("\n");
37424
+ }
37425
+ calculateScore(result, rubrics) {
37426
+ const rubricMap = new Map(rubrics.map((r) => [r.id, r]));
37427
+ const hits = [];
37428
+ const misses = [];
37429
+ let totalWeight = 0;
37430
+ let earnedWeight = 0;
37431
+ let failedRequired = false;
37432
+ for (const check2 of result.checks) {
37433
+ const rubric = rubricMap.get(check2.id);
37434
+ if (!rubric) {
37435
+ continue;
37436
+ }
37437
+ totalWeight += rubric.weight;
37438
+ if (check2.satisfied) {
37439
+ earnedWeight += rubric.weight;
37440
+ hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
37441
+ } else {
37442
+ misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
37443
+ if (rubric.required) {
37444
+ failedRequired = true;
37445
+ }
37446
+ }
37447
+ }
37448
+ const score = totalWeight > 0 ? Math.min(1, Math.max(0, earnedWeight / totalWeight)) : 0;
37449
+ let verdict;
37450
+ if (failedRequired) {
37451
+ verdict = "fail";
37452
+ } else if (score >= 0.8) {
37453
+ verdict = "pass";
37454
+ } else if (score >= 0.6) {
37455
+ verdict = "borderline";
37456
+ } else {
37457
+ verdict = "fail";
37458
+ }
37459
+ return { score, verdict, hits, misses };
37460
+ }
37461
+ };
37265
37462
  var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
37266
37463
 
37267
37464
  Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
@@ -38214,6 +38411,7 @@ async function runEvaluatorList(options) {
38214
38411
  name: evaluator.name,
38215
38412
  type: evaluator.type,
38216
38413
  score: score2.score,
38414
+ verdict: score2.verdict,
38217
38415
  hits: score2.hits,
38218
38416
  misses: score2.misses,
38219
38417
  reasoning: score2.reasoning,
@@ -38241,6 +38439,40 @@ async function runEvaluatorList(options) {
38241
38439
  name: evaluator.name,
38242
38440
  type: evaluator.type,
38243
38441
  score: score2.score,
38442
+ verdict: score2.verdict,
38443
+ hits: score2.hits,
38444
+ misses: score2.misses,
38445
+ reasoning: score2.reasoning,
38446
+ evaluator_provider_request: score2.evaluatorRawRequest
38447
+ });
38448
+ continue;
38449
+ }
38450
+ if (evaluator.type === "rubric") {
38451
+ const rubricEvaluator = new RubricEvaluator({
38452
+ config: evaluator,
38453
+ resolveJudgeProvider: async (context) => {
38454
+ if (context.judgeProvider) {
38455
+ return context.judgeProvider;
38456
+ }
38457
+ return judgeProvider;
38458
+ }
38459
+ });
38460
+ const score2 = await rubricEvaluator.evaluate({
38461
+ evalCase,
38462
+ candidate,
38463
+ target,
38464
+ provider,
38465
+ attempt,
38466
+ promptInputs,
38467
+ now,
38468
+ judgeProvider
38469
+ });
38470
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
38471
+ evaluatorResults.push({
38472
+ name: evaluator.name,
38473
+ type: evaluator.type,
38474
+ score: score2.score,
38475
+ verdict: score2.verdict,
38244
38476
  hits: score2.hits,
38245
38477
  misses: score2.misses,
38246
38478
  reasoning: score2.reasoning,
@@ -38470,6 +38702,82 @@ function isTimeoutLike(error40) {
38470
38702
  const value = String(error40).toLowerCase();
38471
38703
  return value.includes("timeout");
38472
38704
  }
38705
+ var rubricItemSchema = external_exports.object({
38706
+ id: external_exports.string().describe("Short identifier for this rubric (e.g., clarity, completeness)"),
38707
+ description: external_exports.string().describe("What this rubric checks for"),
38708
+ weight: external_exports.number().default(1).describe("Relative importance (default 1.0)"),
38709
+ required: external_exports.boolean().default(true).describe("Whether this is a mandatory requirement")
38710
+ });
38711
+ var rubricGenerationSchema = external_exports.object({
38712
+ rubrics: external_exports.array(rubricItemSchema).describe("List of evaluation rubrics")
38713
+ });
38714
+ async function generateRubrics(options) {
38715
+ const { expectedOutcome, question, referenceAnswer, provider } = options;
38716
+ const prompt = buildPrompt(expectedOutcome, question, referenceAnswer);
38717
+ const model = provider.asLanguageModel?.();
38718
+ if (!model) {
38719
+ throw new Error("Provider does not support language model interface");
38720
+ }
38721
+ const system = `You are an expert at creating evaluation rubrics.
38722
+ You must return a valid JSON object matching this schema:
38723
+ {
38724
+ "rubrics": [
38725
+ {
38726
+ "id": "string (short identifier)",
38727
+ "description": "string (what to check)",
38728
+ "weight": number (default 1.0),
38729
+ "required": boolean (default true)
38730
+ }
38731
+ ]
38732
+ }`;
38733
+ let result;
38734
+ let lastError;
38735
+ for (let attempt = 1; attempt <= 3; attempt++) {
38736
+ try {
38737
+ const { text: text2 } = await generateText({
38738
+ model,
38739
+ system,
38740
+ prompt
38741
+ });
38742
+ const cleaned = text2.replace(/```json\n?|```/g, "").trim();
38743
+ result = rubricGenerationSchema.parse(JSON.parse(cleaned));
38744
+ break;
38745
+ } catch (e) {
38746
+ lastError = e instanceof Error ? e : new Error(String(e));
38747
+ }
38748
+ }
38749
+ if (!result) {
38750
+ throw new Error(`Failed to parse generated rubrics after 3 attempts: ${lastError?.message}`);
38751
+ }
38752
+ return result.rubrics;
38753
+ }
38754
+ function buildPrompt(expectedOutcome, question, referenceAnswer) {
38755
+ const parts = [
38756
+ "You are an expert at creating evaluation rubrics.",
38757
+ "Given the expected outcome (and optionally the question and reference answer),",
38758
+ "generate a list of specific, measurable rubric items to evaluate whether an answer meets the expected outcome.",
38759
+ "",
38760
+ "Each rubric should:",
38761
+ "- Be specific and testable",
38762
+ "- Have a short, descriptive ID",
38763
+ "- Include a clear description of what to check",
38764
+ "- Indicate if it is required (mandatory) or optional",
38765
+ "- Have an appropriate weight (default 1.0, use higher values for more important aspects)",
38766
+ "",
38767
+ "Generate 3-7 rubric items that comprehensively cover the expected outcome.",
38768
+ "",
38769
+ "[[ ## expected_outcome ## ]]",
38770
+ expectedOutcome,
38771
+ ""
38772
+ ];
38773
+ if (question && question.trim().length > 0) {
38774
+ parts.push("[[ ## question ## ]]", question, "");
38775
+ }
38776
+ if (referenceAnswer && referenceAnswer.trim().length > 0) {
38777
+ parts.push("[[ ## reference_answer ## ]]", referenceAnswer, "");
38778
+ }
38779
+ return parts.join("\n");
38780
+ }
38473
38781
  function createAgentKernel() {
38474
38782
  return { status: "stub" };
38475
38783
  }
@@ -38927,12 +39235,12 @@ var ProgressDisplay = class {
38927
39235
  }
38928
39236
  addLogPaths(paths) {
38929
39237
  const newPaths = [];
38930
- for (const path25 of paths) {
38931
- if (this.logPathSet.has(path25)) {
39238
+ for (const path26 of paths) {
39239
+ if (this.logPathSet.has(path26)) {
38932
39240
  continue;
38933
39241
  }
38934
- this.logPathSet.add(path25);
38935
- newPaths.push(path25);
39242
+ this.logPathSet.add(path26);
39243
+ newPaths.push(path26);
38936
39244
  }
38937
39245
  if (newPaths.length === 0) {
38938
39246
  return;
@@ -38948,8 +39256,8 @@ var ProgressDisplay = class {
38948
39256
  this.hasPrintedLogHeader = true;
38949
39257
  }
38950
39258
  const startIndex = this.logPaths.length - newPaths.length;
38951
- newPaths.forEach((path25, offset) => {
38952
- console.log(`${startIndex + offset + 1}. ${path25}`);
39259
+ newPaths.forEach((path26, offset) => {
39260
+ console.log(`${startIndex + offset + 1}. ${path26}`);
38953
39261
  });
38954
39262
  }
38955
39263
  scheduleRender() {
@@ -38997,8 +39305,8 @@ var ProgressDisplay = class {
38997
39305
  if (this.logPaths.length > 0) {
38998
39306
  lines.push("");
38999
39307
  lines.push("Codex CLI logs:");
39000
- this.logPaths.forEach((path25, index) => {
39001
- lines.push(`${index + 1}. ${path25}`);
39308
+ this.logPaths.forEach((path26, index) => {
39309
+ lines.push(`${index + 1}. ${path26}`);
39002
39310
  });
39003
39311
  }
39004
39312
  const rowCount = this.getRenderedRowCount(lines);
@@ -39203,11 +39511,6 @@ function formatEvaluationSummary(summary) {
39203
39511
  return lines.join("\n");
39204
39512
  }
39205
39513
 
39206
- // src/commands/eval/targets.ts
39207
- import { constants as constants5 } from "node:fs";
39208
- import { access as access5 } from "node:fs/promises";
39209
- import path17 from "node:path";
39210
-
39211
39514
  // ../../packages/core/dist/evaluation/validation/index.js
39212
39515
  import { readFile as readFile7 } from "node:fs/promises";
39213
39516
  import { parse as parse6 } from "yaml";
@@ -39323,13 +39626,13 @@ async function validateEvalFile(filePath) {
39323
39626
  message: "Missing or invalid 'id' field (must be a non-empty string)"
39324
39627
  });
39325
39628
  }
39326
- const outcome = evalCase.outcome;
39327
- if (typeof outcome !== "string" || outcome.trim().length === 0) {
39629
+ const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
39630
+ if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
39328
39631
  errors.push({
39329
39632
  severity: "error",
39330
39633
  filePath: absolutePath,
39331
- location: `${location}.outcome`,
39332
- message: "Missing or invalid 'outcome' field (must be a non-empty string)"
39634
+ location: `${location}.expected_outcome`,
39635
+ message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
39333
39636
  });
39334
39637
  }
39335
39638
  const inputMessages = evalCase.input_messages;
@@ -40064,19 +40367,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
40064
40367
  }
40065
40368
  }
40066
40369
 
40067
- // src/commands/eval/targets.ts
40370
+ // src/utils/targets.ts
40371
+ import { constants as constants5 } from "node:fs";
40372
+ import { access as access5 } from "node:fs/promises";
40373
+ import path17 from "node:path";
40068
40374
  var TARGET_FILE_CANDIDATES = [
40069
40375
  "targets.yaml",
40070
40376
  "targets.yml",
40071
40377
  path17.join(".agentv", "targets.yaml"),
40072
40378
  path17.join(".agentv", "targets.yml")
40073
40379
  ];
40074
- var ANSI_YELLOW7 = "\x1B[33m";
40075
- var ANSI_RED2 = "\x1B[31m";
40076
- var ANSI_RESET7 = "\x1B[0m";
40077
- function isTTY() {
40078
- return process.stdout.isTTY ?? false;
40079
- }
40080
40380
  async function fileExists5(filePath) {
40081
40381
  try {
40082
40382
  await access5(filePath, constants5.F_OK);
@@ -40085,10 +40385,6 @@ async function fileExists5(filePath) {
40085
40385
  return false;
40086
40386
  }
40087
40387
  }
40088
- async function readTestSuiteTarget(testFilePath) {
40089
- const metadata = await readTestSuiteMetadata(testFilePath);
40090
- return metadata.target;
40091
- }
40092
40388
  async function discoverTargetsFile(options) {
40093
40389
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
40094
40390
  if (explicitPath) {
@@ -40119,6 +40415,18 @@ async function discoverTargetsFile(options) {
40119
40415
  }
40120
40416
  throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
40121
40417
  }
40418
+
40419
+ // src/commands/eval/targets.ts
40420
+ var ANSI_YELLOW7 = "\x1B[33m";
40421
+ var ANSI_RED2 = "\x1B[31m";
40422
+ var ANSI_RESET7 = "\x1B[0m";
40423
+ function isTTY() {
40424
+ return process.stdout.isTTY ?? false;
40425
+ }
40426
+ async function readTestSuiteTarget(testFilePath) {
40427
+ const metadata = await readTestSuiteMetadata(testFilePath);
40428
+ return metadata.target;
40429
+ }
40122
40430
  function pickTargetName(options) {
40123
40431
  const cliName = options.cliTargetName?.trim();
40124
40432
  if (cliName && cliName !== "default") {
@@ -40705,14 +41013,174 @@ async function resolveEvalPaths(evalPaths, cwd) {
40705
41013
  return sorted;
40706
41014
  }
40707
41015
 
41016
+ // src/commands/generate/rubrics.ts
41017
+ import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
41018
+ import path20 from "node:path";
41019
+ import { pathToFileURL as pathToFileURL2 } from "node:url";
41020
+ import { isMap, isSeq, parseDocument } from "yaml";
41021
+ function isJsonObject3(value) {
41022
+ return typeof value === "object" && value !== null && !Array.isArray(value);
41023
+ }
41024
+ function asString6(value) {
41025
+ return typeof value === "string" ? value : void 0;
41026
+ }
41027
+ async function loadRubricGenerator() {
41028
+ const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
41029
+ if (customGenerator) {
41030
+ const generatorPath = path20.resolve(customGenerator);
41031
+ const generatorUrl = pathToFileURL2(generatorPath).href;
41032
+ const module = await import(generatorUrl);
41033
+ return module.generateRubrics;
41034
+ }
41035
+ return generateRubrics;
41036
+ }
41037
+ async function generateRubricsCommand(options) {
41038
+ const { file: file2, target: targetOverride, verbose } = options;
41039
+ console.log(`Generating rubrics for: ${file2}`);
41040
+ const absolutePath = path20.resolve(file2);
41041
+ const content = await readFile8(absolutePath, "utf8");
41042
+ const doc = parseDocument(content);
41043
+ const parsed = doc.toJSON();
41044
+ if (!isJsonObject3(parsed)) {
41045
+ throw new Error(`Invalid YAML file format: ${file2}`);
41046
+ }
41047
+ const suite = parsed;
41048
+ const evalcases = suite.evalcases;
41049
+ if (!Array.isArray(evalcases)) {
41050
+ throw new Error(`No evalcases found in ${file2}`);
41051
+ }
41052
+ const targetSelection = await selectTarget({
41053
+ testFilePath: absolutePath,
41054
+ repoRoot: process.cwd(),
41055
+ cwd: process.cwd(),
41056
+ cliTargetName: targetOverride,
41057
+ dryRun: false,
41058
+ dryRunDelay: 0,
41059
+ dryRunDelayMin: 0,
41060
+ dryRunDelayMax: 0,
41061
+ env: process.env
41062
+ });
41063
+ if (verbose) {
41064
+ console.log(`Using target: ${targetSelection.targetName}`);
41065
+ }
41066
+ const provider = createProvider(targetSelection.resolvedTarget);
41067
+ const generateRubricsFunc = await loadRubricGenerator();
41068
+ let updatedCount = 0;
41069
+ let skippedCount = 0;
41070
+ const evalcasesNode = doc.getIn(["evalcases"]);
41071
+ if (!evalcasesNode || !isSeq(evalcasesNode)) {
41072
+ throw new Error("evalcases must be a sequence");
41073
+ }
41074
+ for (let i = 0; i < evalcases.length; i++) {
41075
+ const rawCase = evalcases[i];
41076
+ if (!isJsonObject3(rawCase)) {
41077
+ continue;
41078
+ }
41079
+ const evalCase = rawCase;
41080
+ const id = asString6(evalCase.id) ?? "unknown";
41081
+ const expectedOutcome = asString6(evalCase.expected_outcome) ?? asString6(evalCase.outcome);
41082
+ if (!expectedOutcome) {
41083
+ if (verbose) {
41084
+ console.log(` Skipping ${id}: no expected_outcome`);
41085
+ }
41086
+ skippedCount++;
41087
+ continue;
41088
+ }
41089
+ if (evalCase.rubrics !== void 0) {
41090
+ if (verbose) {
41091
+ console.log(` Skipping ${id}: rubrics already defined`);
41092
+ }
41093
+ skippedCount++;
41094
+ continue;
41095
+ }
41096
+ console.log(` Generating rubrics for: ${id}`);
41097
+ const question = extractQuestion(evalCase);
41098
+ const referenceAnswer = asString6(evalCase.reference_answer);
41099
+ const rubrics = await generateRubricsFunc({
41100
+ expectedOutcome,
41101
+ question,
41102
+ referenceAnswer,
41103
+ provider
41104
+ });
41105
+ const caseNode = evalcasesNode.items[i];
41106
+ if (caseNode && isMap(caseNode)) {
41107
+ caseNode.set(
41108
+ "rubrics",
41109
+ rubrics.map(
41110
+ (r) => ({
41111
+ id: r.id,
41112
+ description: r.description,
41113
+ weight: r.weight,
41114
+ required: r.required
41115
+ })
41116
+ )
41117
+ );
41118
+ }
41119
+ updatedCount++;
41120
+ if (verbose) {
41121
+ console.log(` Generated ${rubrics.length} rubric(s)`);
41122
+ }
41123
+ }
41124
+ if (updatedCount > 0) {
41125
+ const output = doc.toString();
41126
+ await writeFile6(absolutePath, output, "utf8");
41127
+ console.log(`
41128
+ Updated ${updatedCount} eval case(s) with generated rubrics`);
41129
+ if (skippedCount > 0) {
41130
+ console.log(`Skipped ${skippedCount} eval case(s)`);
41131
+ }
41132
+ } else {
41133
+ console.log("\nNo eval cases updated (all already have rubrics or missing expected_outcome)");
41134
+ }
41135
+ }
41136
+ function extractQuestion(evalCase) {
41137
+ const explicitQuestion = asString6(evalCase.question);
41138
+ if (explicitQuestion) {
41139
+ return explicitQuestion;
41140
+ }
41141
+ const inputMessages = evalCase.input_messages;
41142
+ if (!Array.isArray(inputMessages)) {
41143
+ return void 0;
41144
+ }
41145
+ for (const msg of inputMessages) {
41146
+ if (!isJsonObject3(msg)) {
41147
+ continue;
41148
+ }
41149
+ if (msg.role === "user" && typeof msg.content === "string") {
41150
+ return msg.content;
41151
+ }
41152
+ }
41153
+ return void 0;
41154
+ }
41155
+
41156
+ // src/commands/generate/index.ts
41157
+ function registerGenerateCommand(program) {
41158
+ const generate = program.command("generate").description("Generate evaluation artifacts");
41159
+ generate.command("rubrics <file>").description("Generate rubrics from expected_outcome in YAML eval file").option(
41160
+ "-t, --target <target>",
41161
+ "Override target for rubric generation (default: file target or openai:gpt-4o)"
41162
+ ).option("-v, --verbose", "Show detailed progress").action(async (file2, options) => {
41163
+ try {
41164
+ await generateRubricsCommand({
41165
+ file: file2,
41166
+ target: options.target,
41167
+ verbose: options.verbose
41168
+ });
41169
+ } catch (error40) {
41170
+ console.error(`Error: ${error40.message}`);
41171
+ process.exit(1);
41172
+ }
41173
+ });
41174
+ }
41175
+
40708
41176
  // src/commands/init/index.ts
40709
41177
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
40710
- import path21 from "node:path";
41178
+ import path24 from "node:path";
40711
41179
  import * as readline from "node:readline/promises";
40712
41180
 
40713
41181
  // src/templates/index.ts
40714
41182
  import { readFileSync, readdirSync, statSync } from "node:fs";
40715
- import path20 from "node:path";
41183
+ import path21 from "node:path";
40716
41184
  import { fileURLToPath } from "node:url";
40717
41185
  function getGithubTemplates() {
40718
41186
  return getTemplatesFromDir(".github");
@@ -40724,12 +41192,12 @@ function getClaudeTemplates() {
40724
41192
  return getTemplatesFromDir(".claude");
40725
41193
  }
40726
41194
  function getTemplatesFromDir(subdir) {
40727
- const currentDir = path20.dirname(fileURLToPath(import.meta.url));
41195
+ const currentDir = path21.dirname(fileURLToPath(import.meta.url));
40728
41196
  let templatesDir;
40729
- if (currentDir.includes(`${path20.sep}dist`)) {
40730
- templatesDir = path20.join(currentDir, "templates", subdir);
41197
+ if (currentDir.includes(`${path21.sep}dist`)) {
41198
+ templatesDir = path21.join(currentDir, "templates", subdir);
40731
41199
  } else {
40732
- templatesDir = path20.join(currentDir, subdir);
41200
+ templatesDir = path21.join(currentDir, subdir);
40733
41201
  }
40734
41202
  return readTemplatesRecursively(templatesDir, "");
40735
41203
  }
@@ -40737,15 +41205,15 @@ function readTemplatesRecursively(dir, relativePath) {
40737
41205
  const templates = [];
40738
41206
  const entries = readdirSync(dir);
40739
41207
  for (const entry of entries) {
40740
- const fullPath = path20.join(dir, entry);
41208
+ const fullPath = path21.join(dir, entry);
40741
41209
  const stat6 = statSync(fullPath);
40742
- const entryRelativePath = relativePath ? path20.join(relativePath, entry) : entry;
41210
+ const entryRelativePath = relativePath ? path21.join(relativePath, entry) : entry;
40743
41211
  if (stat6.isDirectory()) {
40744
41212
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
40745
41213
  } else {
40746
41214
  const content = readFileSync(fullPath, "utf-8");
40747
41215
  templates.push({
40748
- path: entryRelativePath.split(path20.sep).join("/"),
41216
+ path: entryRelativePath.split(path21.sep).join("/"),
40749
41217
  // Normalize to forward slashes
40750
41218
  content
40751
41219
  });
@@ -40768,10 +41236,10 @@ async function promptYesNo(message) {
40768
41236
  }
40769
41237
  }
40770
41238
  async function initCommand(options = {}) {
40771
- const targetPath = path21.resolve(options.targetPath ?? ".");
40772
- const githubDir = path21.join(targetPath, ".github");
40773
- const agentvDir = path21.join(targetPath, ".agentv");
40774
- const claudeDir = path21.join(targetPath, ".claude");
41239
+ const targetPath = path24.resolve(options.targetPath ?? ".");
41240
+ const githubDir = path24.join(targetPath, ".github");
41241
+ const agentvDir = path24.join(targetPath, ".agentv");
41242
+ const claudeDir = path24.join(targetPath, ".claude");
40775
41243
  const githubTemplates = getGithubTemplates();
40776
41244
  const agentvTemplates = getAgentvTemplates();
40777
41245
  const claudeTemplates = getClaudeTemplates();
@@ -40779,32 +41247,32 @@ async function initCommand(options = {}) {
40779
41247
  const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
40780
41248
  const existingFiles = [];
40781
41249
  if (envTemplate) {
40782
- const envFilePath = path21.join(targetPath, ".env.template");
41250
+ const envFilePath = path24.join(targetPath, ".env.template");
40783
41251
  if (existsSync(envFilePath)) {
40784
41252
  existingFiles.push(".env.template");
40785
41253
  }
40786
41254
  }
40787
41255
  if (existsSync(githubDir)) {
40788
41256
  for (const template of githubTemplates) {
40789
- const targetFilePath = path21.join(githubDir, template.path);
41257
+ const targetFilePath = path24.join(githubDir, template.path);
40790
41258
  if (existsSync(targetFilePath)) {
40791
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41259
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40792
41260
  }
40793
41261
  }
40794
41262
  }
40795
41263
  if (existsSync(agentvDir)) {
40796
41264
  for (const template of otherAgentvTemplates) {
40797
- const targetFilePath = path21.join(agentvDir, template.path);
41265
+ const targetFilePath = path24.join(agentvDir, template.path);
40798
41266
  if (existsSync(targetFilePath)) {
40799
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41267
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40800
41268
  }
40801
41269
  }
40802
41270
  }
40803
41271
  if (existsSync(claudeDir)) {
40804
41272
  for (const template of claudeTemplates) {
40805
- const targetFilePath = path21.join(claudeDir, template.path);
41273
+ const targetFilePath = path24.join(claudeDir, template.path);
40806
41274
  if (existsSync(targetFilePath)) {
40807
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41275
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40808
41276
  }
40809
41277
  }
40810
41278
  }
@@ -40831,36 +41299,36 @@ async function initCommand(options = {}) {
40831
41299
  mkdirSync(claudeDir, { recursive: true });
40832
41300
  }
40833
41301
  if (envTemplate) {
40834
- const envFilePath = path21.join(targetPath, ".env.template");
41302
+ const envFilePath = path24.join(targetPath, ".env.template");
40835
41303
  writeFileSync(envFilePath, envTemplate.content, "utf-8");
40836
41304
  console.log("Created .env.template");
40837
41305
  }
40838
41306
  for (const template of githubTemplates) {
40839
- const targetFilePath = path21.join(githubDir, template.path);
40840
- const targetDirPath = path21.dirname(targetFilePath);
41307
+ const targetFilePath = path24.join(githubDir, template.path);
41308
+ const targetDirPath = path24.dirname(targetFilePath);
40841
41309
  if (!existsSync(targetDirPath)) {
40842
41310
  mkdirSync(targetDirPath, { recursive: true });
40843
41311
  }
40844
41312
  writeFileSync(targetFilePath, template.content, "utf-8");
40845
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41313
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40846
41314
  }
40847
41315
  for (const template of otherAgentvTemplates) {
40848
- const targetFilePath = path21.join(agentvDir, template.path);
40849
- const targetDirPath = path21.dirname(targetFilePath);
41316
+ const targetFilePath = path24.join(agentvDir, template.path);
41317
+ const targetDirPath = path24.dirname(targetFilePath);
40850
41318
  if (!existsSync(targetDirPath)) {
40851
41319
  mkdirSync(targetDirPath, { recursive: true });
40852
41320
  }
40853
41321
  writeFileSync(targetFilePath, template.content, "utf-8");
40854
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41322
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40855
41323
  }
40856
41324
  for (const template of claudeTemplates) {
40857
- const targetFilePath = path21.join(claudeDir, template.path);
40858
- const targetDirPath = path21.dirname(targetFilePath);
41325
+ const targetFilePath = path24.join(claudeDir, template.path);
41326
+ const targetDirPath = path24.dirname(targetFilePath);
40859
41327
  if (!existsSync(targetDirPath)) {
40860
41328
  mkdirSync(targetDirPath, { recursive: true });
40861
41329
  }
40862
41330
  writeFileSync(targetFilePath, template.content, "utf-8");
40863
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41331
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40864
41332
  }
40865
41333
  console.log("\nAgentV initialized successfully!");
40866
41334
  console.log("\nFiles installed to root:");
@@ -40868,17 +41336,17 @@ async function initCommand(options = {}) {
40868
41336
  console.log(" - .env.template");
40869
41337
  }
40870
41338
  console.log(`
40871
- Files installed to ${path21.relative(targetPath, githubDir)}:`);
41339
+ Files installed to ${path24.relative(targetPath, githubDir)}:`);
40872
41340
  for (const t of githubTemplates) {
40873
41341
  console.log(` - ${t.path}`);
40874
41342
  }
40875
41343
  console.log(`
40876
- Files installed to ${path21.relative(targetPath, agentvDir)}:`);
41344
+ Files installed to ${path24.relative(targetPath, agentvDir)}:`);
40877
41345
  for (const t of otherAgentvTemplates) {
40878
41346
  console.log(` - ${t.path}`);
40879
41347
  }
40880
41348
  console.log(`
40881
- Files installed to ${path21.relative(targetPath, claudeDir)}:`);
41349
+ Files installed to ${path24.relative(targetPath, claudeDir)}:`);
40882
41350
  for (const t of claudeTemplates) {
40883
41351
  console.log(` - ${t.path}`);
40884
41352
  }
@@ -40980,7 +41448,7 @@ function isTTY2() {
40980
41448
  // src/commands/validate/validate-files.ts
40981
41449
  import { constants as constants7 } from "node:fs";
40982
41450
  import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
40983
- import path24 from "node:path";
41451
+ import path25 from "node:path";
40984
41452
  async function validateFiles(paths) {
40985
41453
  const filePaths = await expandPaths(paths);
40986
41454
  const results = [];
@@ -40998,7 +41466,7 @@ async function validateFiles(paths) {
40998
41466
  };
40999
41467
  }
41000
41468
  async function validateSingleFile(filePath) {
41001
- const absolutePath = path24.resolve(filePath);
41469
+ const absolutePath = path25.resolve(filePath);
41002
41470
  const fileType = await detectFileType(absolutePath);
41003
41471
  if (fileType === "unknown") {
41004
41472
  return {
@@ -41037,7 +41505,7 @@ async function validateSingleFile(filePath) {
41037
41505
  async function expandPaths(paths) {
41038
41506
  const expanded = [];
41039
41507
  for (const inputPath of paths) {
41040
- const absolutePath = path24.resolve(inputPath);
41508
+ const absolutePath = path25.resolve(inputPath);
41041
41509
  try {
41042
41510
  await access7(absolutePath, constants7.F_OK);
41043
41511
  } catch {
@@ -41061,7 +41529,7 @@ async function findYamlFiles(dirPath) {
41061
41529
  try {
41062
41530
  const entries = await readdir3(dirPath, { withFileTypes: true });
41063
41531
  for (const entry of entries) {
41064
- const fullPath = path24.join(dirPath, entry.name);
41532
+ const fullPath = path25.join(dirPath, entry.name);
41065
41533
  if (entry.isDirectory()) {
41066
41534
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
41067
41535
  continue;
@@ -41078,7 +41546,7 @@ async function findYamlFiles(dirPath) {
41078
41546
  return results;
41079
41547
  }
41080
41548
  function isYamlFile(filePath) {
41081
- const ext = path24.extname(filePath).toLowerCase();
41549
+ const ext = path25.extname(filePath).toLowerCase();
41082
41550
  return ext === ".yaml" || ext === ".yml";
41083
41551
  }
41084
41552
 
@@ -41115,6 +41583,7 @@ function createProgram() {
41115
41583
  registerStatusCommand(program);
41116
41584
  registerEvalCommand(program);
41117
41585
  registerValidateCommand(program);
41586
+ registerGenerateCommand(program);
41118
41587
  program.command("init [path]").description(
41119
41588
  "Initialize AgentV in your project (installs prompt templates and schema to .github)"
41120
41589
  ).action(async (targetPath) => {
@@ -41137,4 +41606,4 @@ export {
41137
41606
  createProgram,
41138
41607
  runCli
41139
41608
  };
41140
- //# sourceMappingURL=chunk-GDGNKNKP.js.map
41609
+ //# sourceMappingURL=chunk-MA3MJNJH.js.map