agentv 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -155,7 +155,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
155
155
  import path18 from "node:path";
156
156
  import { pathToFileURL } from "node:url";
157
157
 
158
- // ../../packages/core/dist/chunk-SVY324GN.js
158
+ // ../../packages/core/dist/chunk-BO7KG7JX.js
159
159
  import { constants } from "node:fs";
160
160
  import { access, readFile } from "node:fs/promises";
161
161
  import path from "node:path";
@@ -638,8 +638,8 @@ function getErrorMap() {
638
638
 
639
639
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
640
640
  var makeIssue = (params) => {
641
- const { data, path: path25, errorMaps, issueData } = params;
642
- const fullPath = [...path25, ...issueData.path || []];
641
+ const { data, path: path26, errorMaps, issueData } = params;
642
+ const fullPath = [...path26, ...issueData.path || []];
643
643
  const fullIssue = {
644
644
  ...issueData,
645
645
  path: fullPath
@@ -755,11 +755,11 @@ var errorUtil;
755
755
 
756
756
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
757
757
  var ParseInputLazyPath = class {
758
- constructor(parent, value, path25, key2) {
758
+ constructor(parent, value, path26, key2) {
759
759
  this._cachedPath = [];
760
760
  this.parent = parent;
761
761
  this.data = value;
762
- this._path = path25;
762
+ this._path = path26;
763
763
  this._key = key2;
764
764
  }
765
765
  get path() {
@@ -4201,7 +4201,7 @@ var coerce = {
4201
4201
  };
4202
4202
  var NEVER = INVALID;
4203
4203
 
4204
- // ../../packages/core/dist/chunk-SVY324GN.js
4204
+ // ../../packages/core/dist/chunk-BO7KG7JX.js
4205
4205
  async function fileExists(filePath) {
4206
4206
  try {
4207
4207
  await access(filePath, constants.F_OK);
@@ -4874,7 +4874,7 @@ function isAgentProvider(provider) {
4874
4874
  import { readFile as readFile5 } from "node:fs/promises";
4875
4875
  import path62 from "node:path";
4876
4876
  import { parse as parse22 } from "yaml";
4877
- import { readFile as readFile6 } from "node:fs/promises";
4877
+ import { readFile as readFile4 } from "node:fs/promises";
4878
4878
  import path22 from "node:path";
4879
4879
  import micromatch from "micromatch";
4880
4880
  import { parse as parse5 } from "yaml";
@@ -5976,10 +5976,10 @@ function assignProp(target, prop, value) {
5976
5976
  configurable: true
5977
5977
  });
5978
5978
  }
5979
- function getElementAtPath(obj, path25) {
5980
- if (!path25)
5979
+ function getElementAtPath(obj, path26) {
5980
+ if (!path26)
5981
5981
  return obj;
5982
- return path25.reduce((acc, key2) => acc?.[key2], obj);
5982
+ return path26.reduce((acc, key2) => acc?.[key2], obj);
5983
5983
  }
5984
5984
  function promiseAllObject(promisesObj) {
5985
5985
  const keys = Object.keys(promisesObj);
@@ -6299,11 +6299,11 @@ function aborted(x, startIndex = 0) {
6299
6299
  }
6300
6300
  return false;
6301
6301
  }
6302
- function prefixIssues(path25, issues) {
6302
+ function prefixIssues(path26, issues) {
6303
6303
  return issues.map((iss) => {
6304
6304
  var _a17;
6305
6305
  (_a17 = iss).path ?? (_a17.path = []);
6306
- iss.path.unshift(path25);
6306
+ iss.path.unshift(path26);
6307
6307
  return iss;
6308
6308
  });
6309
6309
  }
@@ -6440,7 +6440,7 @@ function treeifyError(error40, _mapper) {
6440
6440
  return issue2.message;
6441
6441
  };
6442
6442
  const result = { errors: [] };
6443
- const processError = (error41, path25 = []) => {
6443
+ const processError = (error41, path26 = []) => {
6444
6444
  var _a17, _b8;
6445
6445
  for (const issue2 of error41.issues) {
6446
6446
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6450,7 +6450,7 @@ function treeifyError(error40, _mapper) {
6450
6450
  } else if (issue2.code === "invalid_element") {
6451
6451
  processError({ issues: issue2.issues }, issue2.path);
6452
6452
  } else {
6453
- const fullpath = [...path25, ...issue2.path];
6453
+ const fullpath = [...path26, ...issue2.path];
6454
6454
  if (fullpath.length === 0) {
6455
6455
  result.errors.push(mapper(issue2));
6456
6456
  continue;
@@ -6480,9 +6480,9 @@ function treeifyError(error40, _mapper) {
6480
6480
  processError(error40);
6481
6481
  return result;
6482
6482
  }
6483
- function toDotPath(path25) {
6483
+ function toDotPath(path26) {
6484
6484
  const segs = [];
6485
- for (const seg of path25) {
6485
+ for (const seg of path26) {
6486
6486
  if (typeof seg === "number")
6487
6487
  segs.push(`[${seg}]`);
6488
6488
  else if (typeof seg === "symbol")
@@ -26035,14 +26035,14 @@ function createAzure(options = {}) {
26035
26035
  description: "Azure OpenAI resource name"
26036
26036
  });
26037
26037
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26038
- const url2 = ({ path: path25, modelId }) => {
26038
+ const url2 = ({ path: path26, modelId }) => {
26039
26039
  var _a24;
26040
26040
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26041
26041
  let fullUrl;
26042
26042
  if (options.useDeploymentBasedUrls) {
26043
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path25}`);
26043
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path26}`);
26044
26044
  } else {
26045
- fullUrl = new URL(`${baseUrlPrefix}/v1${path25}`);
26045
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path26}`);
26046
26046
  }
26047
26047
  fullUrl.searchParams.set("api-version", apiVersion);
26048
26048
  return fullUrl.toString();
@@ -32488,11 +32488,11 @@ import { promisify as promisify22 } from "node:util";
32488
32488
  import path82 from "node:path";
32489
32489
  import path102 from "node:path";
32490
32490
 
32491
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/agentDispatch.js
32491
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/agentDispatch.js
32492
32492
  import { stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
32493
32493
  import path10 from "node:path";
32494
32494
 
32495
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/utils/fs.js
32495
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/fs.js
32496
32496
  import { constants as constants2 } from "node:fs";
32497
32497
  import { access as access2, mkdir, readdir, rm, stat } from "node:fs/promises";
32498
32498
  import path2 from "node:path";
@@ -32525,7 +32525,7 @@ async function removeIfExists(target) {
32525
32525
  }
32526
32526
  }
32527
32527
 
32528
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/utils/path.js
32528
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/path.js
32529
32529
  import path3 from "node:path";
32530
32530
  function pathToFileUri(filePath) {
32531
32531
  const absolutePath = path3.isAbsolute(filePath) ? filePath : path3.resolve(filePath);
@@ -32536,11 +32536,10 @@ function pathToFileUri(filePath) {
32536
32536
  return `file://${normalizedPath}`;
32537
32537
  }
32538
32538
 
32539
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/promptBuilder.js
32540
- import { readFile as readFile2 } from "node:fs/promises";
32539
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/promptBuilder.js
32541
32540
  import path4 from "node:path";
32542
32541
 
32543
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/utils/template.js
32542
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/template.js
32544
32543
  function renderTemplate(content, variables) {
32545
32544
  if (!content) {
32546
32545
  return content;
@@ -32561,8 +32560,12 @@ function renderTemplate(content, variables) {
32561
32560
  return result;
32562
32561
  }
32563
32562
 
32564
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/templates.js
32565
- var DEFAULT_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
32563
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/templates.js
32564
+ var DEFAULT_REQUEST_TEMPLATE = `[[ ## task ## ]]
32565
+
32566
+ {{userQuery}}
32567
+
32568
+ [[ ## system_instructions ## ]]
32566
32569
 
32567
32570
  **IMPORTANT**: Follow these exact steps:
32568
32571
  1. Create and write your complete response to: {{responseFileTmp}}
@@ -32573,21 +32576,17 @@ if (Test-Path subagent.lock) { del subagent.lock }
32573
32576
  \`\`\`
32574
32577
 
32575
32578
  Do not proceed to step 2 until your response is completely written to the temporary file.
32576
-
32577
- [[ ## task ## ]]
32579
+ `;
32580
+ var DEFAULT_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
32578
32581
 
32579
32582
  {{userQuery}}
32580
- `;
32581
- var DEFAULT_BATCH_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
32583
+
32584
+ [[ ## system_instructions ## ]]
32582
32585
 
32583
32586
  **IMPORTANT**: Follow these exact steps:
32584
32587
  1. Create and write your complete response to: {{responseFileTmp}}
32585
32588
  2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
32586
- 5. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
32587
-
32588
- [[ ## task ## ]]
32589
-
32590
- {{userQuery}}
32589
+ 3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
32591
32590
  `;
32592
32591
  var DEFAULT_BATCH_ORCHESTRATOR_TEMPLATE = `MANDATORY: Run #runSubagent tool in your Available Actions for each request file to process them in isolated contexts.
32593
32592
  DO NOT read the request files yourself - only pass the file paths to each subagent:
@@ -32603,14 +32602,7 @@ if ($missing.Count -eq 0) { del subagent.lock }
32603
32602
  \`\`\`
32604
32603
  `;
32605
32604
 
32606
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/promptBuilder.js
32607
- async function loadTemplateFile(filePath) {
32608
- try {
32609
- return await readFile2(filePath, "utf8");
32610
- } catch (error40) {
32611
- throw new Error(`Failed to load template file '${filePath}': ${error40.message}`);
32612
- }
32613
- }
32605
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/promptBuilder.js
32614
32606
  function loadDefaultRequestTemplate() {
32615
32607
  return DEFAULT_REQUEST_TEMPLATE;
32616
32608
  }
@@ -32643,18 +32635,18 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
32643
32635
  });
32644
32636
  }
32645
32637
 
32646
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/responseWaiter.js
32647
- import { readFile as readFile3 } from "node:fs/promises";
32638
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/responseWaiter.js
32639
+ import { readFile as readFile2 } from "node:fs/promises";
32648
32640
  import path5 from "node:path";
32649
32641
 
32650
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/utils/time.js
32642
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/time.js
32651
32643
  function sleep(ms) {
32652
32644
  return new Promise((resolve2) => {
32653
32645
  setTimeout(resolve2, ms);
32654
32646
  });
32655
32647
  }
32656
32648
 
32657
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/responseWaiter.js
32649
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/responseWaiter.js
32658
32650
  async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, silent = false) {
32659
32651
  if (!silent) {
32660
32652
  console.error(`waiting for agent to finish: ${responseFileFinal}`);
@@ -32673,7 +32665,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
32673
32665
  const maxAttempts = 10;
32674
32666
  while (attempts < maxAttempts) {
32675
32667
  try {
32676
- const content = await readFile3(responseFileFinal, { encoding: "utf8" });
32668
+ const content = await readFile2(responseFileFinal, { encoding: "utf8" });
32677
32669
  if (!silent) {
32678
32670
  process.stdout.write(`${content}
32679
32671
  `);
@@ -32720,7 +32712,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
32720
32712
  const maxAttempts = 10;
32721
32713
  while (attempts < maxAttempts) {
32722
32714
  try {
32723
- const content = await readFile3(file2, { encoding: "utf8" });
32715
+ const content = await readFile2(file2, { encoding: "utf8" });
32724
32716
  if (!silent) {
32725
32717
  process.stdout.write(`${content}
32726
32718
  `);
@@ -32741,13 +32733,13 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
32741
32733
  return true;
32742
32734
  }
32743
32735
 
32744
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/vscodeProcess.js
32736
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/vscodeProcess.js
32745
32737
  import { exec, spawn } from "node:child_process";
32746
32738
  import { mkdir as mkdir2, writeFile } from "node:fs/promises";
32747
32739
  import path7 from "node:path";
32748
32740
  import { promisify } from "node:util";
32749
32741
 
32750
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/constants.js
32742
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/constants.js
32751
32743
  import os from "node:os";
32752
32744
  import path6 from "node:path";
32753
32745
  var DEFAULT_LOCK_NAME = "subagent.lock";
@@ -32758,7 +32750,7 @@ function getDefaultSubagentRoot(vscodeCmd = "code") {
32758
32750
  }
32759
32751
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
32760
32752
 
32761
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/vscodeProcess.js
32753
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/vscodeProcess.js
32762
32754
  var execAsync = promisify(exec);
32763
32755
  var DEFAULT_WAKEUP_CONTENT = `---
32764
32756
  description: 'Wake-up Signal'
@@ -32857,11 +32849,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
32857
32849
  }
32858
32850
  }
32859
32851
 
32860
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/workspaceManager.js
32861
- import { copyFile, mkdir as mkdir3, readFile as readFile4, readdir as readdir2, stat as stat2, writeFile as writeFile2 } from "node:fs/promises";
32852
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/workspaceManager.js
32853
+ import { copyFile, mkdir as mkdir3, readFile as readFile3, readdir as readdir2, stat as stat2, writeFile as writeFile2 } from "node:fs/promises";
32862
32854
  import path9 from "node:path";
32863
32855
 
32864
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/utils/workspace.js
32856
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/workspace.js
32865
32857
  import path8 from "node:path";
32866
32858
 
32867
32859
  // ../../node_modules/.bun/json5@2.2.3/node_modules/json5/dist/index.mjs
@@ -33949,7 +33941,7 @@ var JSON5 = {
33949
33941
  var lib = JSON5;
33950
33942
  var dist_default = lib;
33951
33943
 
33952
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/utils/workspace.js
33944
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/workspace.js
33953
33945
  function transformWorkspacePaths(workspaceContent, templateDir) {
33954
33946
  let workspace;
33955
33947
  try {
@@ -34019,7 +34011,7 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
34019
34011
  return JSON.stringify(transformedWorkspace, null, 2);
34020
34012
  }
34021
34013
 
34022
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/workspaceManager.js
34014
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/workspaceManager.js
34023
34015
  var DEFAULT_WORKSPACE_TEMPLATE = {
34024
34016
  folders: [
34025
34017
  {
@@ -34058,7 +34050,7 @@ async function copyAgentConfig(subagentDir, workspaceTemplate) {
34058
34050
  if (!stats.isFile()) {
34059
34051
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
34060
34052
  }
34061
- const templateText = await readFile4(workspaceSrc, "utf8");
34053
+ const templateText = await readFile3(workspaceSrc, "utf8");
34062
34054
  workspaceContent = JSON.parse(templateText);
34063
34055
  } else {
34064
34056
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
@@ -34126,7 +34118,7 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
34126
34118
  return 0;
34127
34119
  }
34128
34120
 
34129
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/agentDispatch.js
34121
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/agentDispatch.js
34130
34122
  function generateTimestamp() {
34131
34123
  return (/* @__PURE__ */ new Date()).toISOString().replace(/[-:TZ.]/g, "").slice(0, 14);
34132
34124
  }
@@ -34170,15 +34162,7 @@ async function dispatchAgentSession(options) {
34170
34162
  error: error40.message
34171
34163
  };
34172
34164
  }
34173
- let templateContent;
34174
- try {
34175
- templateContent = requestTemplate ? await loadTemplateFile(path10.resolve(requestTemplate)) : loadDefaultRequestTemplate();
34176
- } catch (error40) {
34177
- return {
34178
- exitCode: 1,
34179
- error: error40.message
34180
- };
34181
- }
34165
+ const templateContent = requestTemplate ?? loadDefaultRequestTemplate();
34182
34166
  const subagentRootPath = subagentRoot ?? getSubagentRoot(vscodeCmd);
34183
34167
  const subagentDir = await findUnlockedSubagent(subagentRootPath);
34184
34168
  if (!subagentDir) {
@@ -34288,17 +34272,7 @@ async function dispatchBatchAgent(options) {
34288
34272
  error: error40.message
34289
34273
  };
34290
34274
  }
34291
- let batchRequestTemplateContent;
34292
- try {
34293
- batchRequestTemplateContent = requestTemplate ? await loadTemplateFile(path10.resolve(requestTemplate)) : loadDefaultBatchRequestTemplate();
34294
- } catch (error40) {
34295
- return {
34296
- exitCode: 1,
34297
- requestFiles,
34298
- queryCount,
34299
- error: error40.message
34300
- };
34301
- }
34275
+ const batchRequestTemplateContent = requestTemplate ?? loadDefaultBatchRequestTemplate();
34302
34276
  const orchestratorTemplateContent = loadDefaultBatchOrchestratorTemplate();
34303
34277
  const subagentRootPath = subagentRoot ?? getSubagentRoot(vscodeCmd);
34304
34278
  const subagentDir = await findUnlockedSubagent(subagentRootPath);
@@ -34406,7 +34380,7 @@ async function dispatchBatchAgent(options) {
34406
34380
  }
34407
34381
  }
34408
34382
 
34409
- // ../../node_modules/.bun/subagent@0.5.5/node_modules/subagent/dist/vscode/provision.js
34383
+ // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/provision.js
34410
34384
  import { writeFile as writeFile4 } from "node:fs/promises";
34411
34385
  import path11 from "node:path";
34412
34386
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
@@ -34534,7 +34508,7 @@ async function provisionSubagents(options) {
34534
34508
 
34535
34509
  // ../../packages/core/dist/index.js
34536
34510
  import { constants as constants32 } from "node:fs";
34537
- import { access as access32, readFile as readFile62 } from "node:fs/promises";
34511
+ import { access as access32, readFile as readFile6 } from "node:fs/promises";
34538
34512
  import path112 from "node:path";
34539
34513
  import { parse as parse32 } from "yaml";
34540
34514
  import { createHash, randomUUID as randomUUID2 } from "node:crypto";
@@ -34579,7 +34553,7 @@ function isTestMessage(value) {
34579
34553
  }
34580
34554
  return candidate.content.every(isJsonObject);
34581
34555
  }
34582
- var EVALUATOR_KIND_VALUES = ["code", "llm_judge"];
34556
+ var EVALUATOR_KIND_VALUES = ["code", "llm_judge", "rubric"];
34583
34557
  var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
34584
34558
  function isEvaluatorKind(value) {
34585
34559
  return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
@@ -34767,7 +34741,7 @@ async function loadConfig(evalFilePath, repoRoot) {
34767
34741
  continue;
34768
34742
  }
34769
34743
  try {
34770
- const rawConfig = await readFile6(configPath, "utf8");
34744
+ const rawConfig = await readFile4(configPath, "utf8");
34771
34745
  const parsed = parse5(rawConfig);
34772
34746
  if (!isJsonObject(parsed)) {
34773
34747
  logWarning(`Invalid .agentv/config.yaml format at ${configPath}`);
@@ -34946,6 +34920,29 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
34946
34920
  }
34947
34921
  }
34948
34922
  const _model = asString2(rawEvaluator.model);
34923
+ if (typeValue === "rubric") {
34924
+ const rubrics = rawEvaluator.rubrics;
34925
+ if (!Array.isArray(rubrics)) {
34926
+ logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': missing rubrics array`);
34927
+ continue;
34928
+ }
34929
+ const parsedRubrics = rubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
34930
+ id: asString2(rubric.id) ?? `rubric-${index + 1}`,
34931
+ description: asString2(rubric.description) ?? "",
34932
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
34933
+ required: typeof rubric.required === "boolean" ? rubric.required : true
34934
+ })).filter((r) => r.description.length > 0);
34935
+ if (parsedRubrics.length === 0) {
34936
+ logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': no valid rubrics found`);
34937
+ continue;
34938
+ }
34939
+ evaluators.push({
34940
+ name: name16,
34941
+ type: "rubric",
34942
+ rubrics: parsedRubrics
34943
+ });
34944
+ continue;
34945
+ }
34949
34946
  evaluators.push({
34950
34947
  name: name16,
34951
34948
  type: "llm_judge",
@@ -35416,7 +35413,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35416
35413
  continue;
35417
35414
  }
35418
35415
  const conversationId = asString5(evalcase.conversation_id);
35419
- const outcome = asString5(evalcase.outcome);
35416
+ const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
35420
35417
  const inputMessagesValue = evalcase.input_messages;
35421
35418
  const expectedMessagesValue = evalcase.expected_messages;
35422
35419
  if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
@@ -35470,6 +35467,33 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
35470
35467
  logError(`Skipping eval case '${id}': ${message}`);
35471
35468
  continue;
35472
35469
  }
35470
+ const inlineRubrics = evalcase.rubrics;
35471
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
35472
+ const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
35473
+ if (typeof rubric === "string") {
35474
+ return {
35475
+ id: `rubric-${index + 1}`,
35476
+ description: rubric,
35477
+ weight: 1,
35478
+ required: true
35479
+ };
35480
+ }
35481
+ return {
35482
+ id: asString5(rubric.id) ?? `rubric-${index + 1}`,
35483
+ description: asString5(rubric.description) ?? "",
35484
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1,
35485
+ required: typeof rubric.required === "boolean" ? rubric.required : true
35486
+ };
35487
+ }).filter((r) => r.description.length > 0);
35488
+ if (rubricItems.length > 0) {
35489
+ const rubricEvaluator = {
35490
+ name: "rubric",
35491
+ type: "rubric",
35492
+ rubrics: rubricItems
35493
+ };
35494
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
35495
+ }
35496
+ }
35473
35497
  const userFilePaths = [];
35474
35498
  for (const segment of inputSegments) {
35475
35499
  if (segment.type === "file" && typeof segment.resolvedPath === "string") {
@@ -35562,6 +35586,9 @@ var AzureProvider = class {
35562
35586
  retryConfig: this.retryConfig
35563
35587
  });
35564
35588
  }
35589
+ asLanguageModel() {
35590
+ return this.model;
35591
+ }
35565
35592
  };
35566
35593
  var AnthropicProvider = class {
35567
35594
  constructor(targetName, config2) {
@@ -35595,6 +35622,9 @@ var AnthropicProvider = class {
35595
35622
  providerOptions
35596
35623
  });
35597
35624
  }
35625
+ asLanguageModel() {
35626
+ return this.model;
35627
+ }
35598
35628
  };
35599
35629
  var GeminiProvider = class {
35600
35630
  constructor(targetName, config2) {
@@ -35625,6 +35655,9 @@ var GeminiProvider = class {
35625
35655
  retryConfig: this.retryConfig
35626
35656
  });
35627
35657
  }
35658
+ asLanguageModel() {
35659
+ return this.model;
35660
+ }
35628
35661
  };
35629
35662
  function buildAzureOptions(config2) {
35630
35663
  const options = {
@@ -36932,38 +36965,38 @@ var MockProvider = class {
36932
36965
  return this.delayMs;
36933
36966
  }
36934
36967
  };
36935
- var AGENTV_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
36968
+ var AGENTV_REQUEST_TEMPLATE = `[[ ## task ## ]]
36969
+
36970
+ {{userQuery}}
36936
36971
 
36937
- **IMPORTANT**: You are processing an evaluation request from AgentV. Follow these exact steps:
36972
+ [[ ## system_instructions ## ]]
36938
36973
 
36939
- 1. Read and understand all guideline files and attachments provided above
36940
- 2. Process the user query below according to the guidelines
36941
- 3. Write your complete response to: {{responseFileTmp}}
36942
- 4. When completely finished, run these PowerShell commands to signal completion:
36943
- \`\`\`powershell
36974
+ **IMPORTANT**: Follow these exact steps:
36975
+ 1. Create and write your complete response to: {{responseFileTmp}}
36976
+ - Do NOT create any additional output files in the workspace.
36977
+ - All intended file outputs/changes MUST be written in your response file.
36978
+ - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
36979
+ 2. When completely finished, run these PowerShell commands to signal completion:
36980
+ \`\`\`
36944
36981
  Move-Item -LiteralPath '{{responseFileTmp}}' -Destination '{{responseFileFinal}}'
36945
- if (Test-Path subagent.lock) { Remove-Item subagent.lock }
36982
+ if (Test-Path subagent.lock) { del subagent.lock }
36946
36983
  \`\`\`
36947
36984
 
36948
- Do not proceed to step 4 until your response is completely written to the temporary file.
36949
-
36950
- [[ ## task ## ]]
36951
-
36952
- {{userQuery}}
36985
+ Do not proceed to step 2 until your response is completely written to the temporary file.
36953
36986
  `;
36954
- var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## system_instructions ## ]]
36955
-
36956
- **IMPORTANT**: You are processing a batch evaluation request from AgentV. Follow these exact steps:
36987
+ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
36957
36988
 
36958
- 1. Read and understand all guideline files and attachments provided above
36959
- 2. Process the user query below according to the guidelines
36960
- 3. Write your complete response to: {{responseFileTmp}}
36961
- 4. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
36962
- 5. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
36989
+ {{userQuery}}
36963
36990
 
36964
- [[ ## task ## ]]
36991
+ [[ ## system_instructions ## ]]
36965
36992
 
36966
- {{userQuery}}
36993
+ **IMPORTANT**: Follow these exact steps:
36994
+ 1. Create and write your complete response to: {{responseFileTmp}}
36995
+ - Do NOT create any additional output files in the workspace.
36996
+ - All intended file outputs/changes MUST be written in your response file.
36997
+ - For each intended file, include the repo name, relative path and unified git diff following the convention \`diff --git ...\`.
36998
+ 2. When completely finished and the response is stable, rename it to: {{responseFileFinal}}
36999
+ 3. Do not unlock the workspace from this request; batch orchestration will handle unlocking after all responses are ready.
36967
37000
  `;
36968
37001
  var VSCodeProvider = class {
36969
37002
  id;
@@ -37251,7 +37284,7 @@ async function readTargetDefinitions(filePath) {
37251
37284
  if (!await fileExists3(absolutePath)) {
37252
37285
  throw new Error(`targets.yaml not found at ${absolutePath}`);
37253
37286
  }
37254
- const raw = await readFile62(absolutePath, "utf8");
37287
+ const raw = await readFile6(absolutePath, "utf8");
37255
37288
  const parsed = parse32(raw);
37256
37289
  if (!isRecord(parsed)) {
37257
37290
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -37288,6 +37321,144 @@ function createProvider(target) {
37288
37321
  }
37289
37322
  }
37290
37323
  }
37324
+ var rubricCheckResultSchema = external_exports.object({
37325
+ id: external_exports.string().describe("The ID of the rubric item being checked"),
37326
+ satisfied: external_exports.boolean().describe("Whether this rubric requirement is met"),
37327
+ reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this check")
37328
+ });
37329
+ var rubricEvaluationSchema = external_exports.object({
37330
+ checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
37331
+ overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
37332
+ });
37333
+ var RubricEvaluator = class {
37334
+ kind = "rubric";
37335
+ config;
37336
+ resolveJudgeProvider;
37337
+ constructor(options) {
37338
+ this.config = options.config;
37339
+ this.resolveJudgeProvider = options.resolveJudgeProvider;
37340
+ }
37341
+ async evaluate(context) {
37342
+ const judgeProvider = await this.resolveJudgeProvider(context);
37343
+ if (!judgeProvider) {
37344
+ throw new Error("No judge provider available for rubric evaluation");
37345
+ }
37346
+ if (!this.config.rubrics || this.config.rubrics.length === 0) {
37347
+ throw new Error(
37348
+ `No rubrics found for evaluator "${this.config.name}". Run "agentv generate rubrics" first.`
37349
+ );
37350
+ }
37351
+ const prompt = this.buildPrompt(context, this.config.rubrics);
37352
+ const model = judgeProvider.asLanguageModel?.();
37353
+ if (!model) {
37354
+ throw new Error("Judge provider does not support language model interface");
37355
+ }
37356
+ const system = `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
37357
+ You must return a valid JSON object matching this schema:
37358
+ {
37359
+ "checks": [
37360
+ {
37361
+ "id": "string (rubric id)",
37362
+ "satisfied": boolean,
37363
+ "reasoning": "string (brief explanation)"
37364
+ }
37365
+ ],
37366
+ "overall_reasoning": "string (summary)"
37367
+ }`;
37368
+ let result;
37369
+ let lastError;
37370
+ for (let attempt = 1; attempt <= 3; attempt++) {
37371
+ try {
37372
+ const { text: text2 } = await generateText({
37373
+ model,
37374
+ system,
37375
+ prompt
37376
+ });
37377
+ const cleaned = text2.replace(/```json\n?|```/g, "").trim();
37378
+ result = rubricEvaluationSchema.parse(JSON.parse(cleaned));
37379
+ break;
37380
+ } catch (e) {
37381
+ lastError = e instanceof Error ? e : new Error(String(e));
37382
+ }
37383
+ }
37384
+ if (!result) {
37385
+ throw new Error(
37386
+ `Failed to parse rubric evaluation result after 3 attempts: ${lastError?.message}`
37387
+ );
37388
+ }
37389
+ const { score, verdict, hits, misses } = this.calculateScore(result, this.config.rubrics);
37390
+ return {
37391
+ score,
37392
+ verdict,
37393
+ hits,
37394
+ misses,
37395
+ expectedAspectCount: this.config.rubrics.length,
37396
+ reasoning: result.overall_reasoning,
37397
+ evaluatorRawRequest: {
37398
+ prompt
37399
+ }
37400
+ };
37401
+ }
37402
+ buildPrompt(context, rubrics) {
37403
+ const parts = [
37404
+ "You are an expert evaluator. Evaluate the candidate answer against each rubric item below.",
37405
+ "",
37406
+ "[[ ## question ## ]]",
37407
+ context.evalCase.question,
37408
+ "",
37409
+ "[[ ## expected_outcome ## ]]",
37410
+ context.evalCase.expected_outcome,
37411
+ ""
37412
+ ];
37413
+ if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
37414
+ parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
37415
+ }
37416
+ parts.push("[[ ## candidate_answer ## ]]", context.candidate, "", "[[ ## rubrics ## ]]");
37417
+ for (const rubric of rubrics) {
37418
+ const requiredLabel = rubric.required ? " (REQUIRED)" : "";
37419
+ const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
37420
+ parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
37421
+ }
37422
+ parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
37423
+ return parts.join("\n");
37424
+ }
37425
+ calculateScore(result, rubrics) {
37426
+ const rubricMap = new Map(rubrics.map((r) => [r.id, r]));
37427
+ const hits = [];
37428
+ const misses = [];
37429
+ let totalWeight = 0;
37430
+ let earnedWeight = 0;
37431
+ let failedRequired = false;
37432
+ for (const check2 of result.checks) {
37433
+ const rubric = rubricMap.get(check2.id);
37434
+ if (!rubric) {
37435
+ continue;
37436
+ }
37437
+ totalWeight += rubric.weight;
37438
+ if (check2.satisfied) {
37439
+ earnedWeight += rubric.weight;
37440
+ hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
37441
+ } else {
37442
+ misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
37443
+ if (rubric.required) {
37444
+ failedRequired = true;
37445
+ }
37446
+ }
37447
+ }
37448
+ const score = totalWeight > 0 ? Math.min(1, Math.max(0, earnedWeight / totalWeight)) : 0;
37449
+ let verdict;
37450
+ if (failedRequired) {
37451
+ verdict = "fail";
37452
+ } else if (score >= 0.8) {
37453
+ verdict = "pass";
37454
+ } else if (score >= 0.6) {
37455
+ verdict = "borderline";
37456
+ } else {
37457
+ verdict = "fail";
37458
+ }
37459
+ return { score, verdict, hits, misses };
37460
+ }
37461
+ };
37291
37462
  var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
37292
37463
 
37293
37464
  Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
@@ -38240,6 +38411,7 @@ async function runEvaluatorList(options) {
38240
38411
  name: evaluator.name,
38241
38412
  type: evaluator.type,
38242
38413
  score: score2.score,
38414
+ verdict: score2.verdict,
38243
38415
  hits: score2.hits,
38244
38416
  misses: score2.misses,
38245
38417
  reasoning: score2.reasoning,
@@ -38267,6 +38439,40 @@ async function runEvaluatorList(options) {
38267
38439
  name: evaluator.name,
38268
38440
  type: evaluator.type,
38269
38441
  score: score2.score,
38442
+ verdict: score2.verdict,
38443
+ hits: score2.hits,
38444
+ misses: score2.misses,
38445
+ reasoning: score2.reasoning,
38446
+ evaluator_provider_request: score2.evaluatorRawRequest
38447
+ });
38448
+ continue;
38449
+ }
38450
+ if (evaluator.type === "rubric") {
38451
+ const rubricEvaluator = new RubricEvaluator({
38452
+ config: evaluator,
38453
+ resolveJudgeProvider: async (context) => {
38454
+ if (context.judgeProvider) {
38455
+ return context.judgeProvider;
38456
+ }
38457
+ return judgeProvider;
38458
+ }
38459
+ });
38460
+ const score2 = await rubricEvaluator.evaluate({
38461
+ evalCase,
38462
+ candidate,
38463
+ target,
38464
+ provider,
38465
+ attempt,
38466
+ promptInputs,
38467
+ now,
38468
+ judgeProvider
38469
+ });
38470
+ scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
38471
+ evaluatorResults.push({
38472
+ name: evaluator.name,
38473
+ type: evaluator.type,
38474
+ score: score2.score,
38475
+ verdict: score2.verdict,
38270
38476
  hits: score2.hits,
38271
38477
  misses: score2.misses,
38272
38478
  reasoning: score2.reasoning,
@@ -38496,6 +38702,82 @@ function isTimeoutLike(error40) {
38496
38702
  const value = String(error40).toLowerCase();
38497
38703
  return value.includes("timeout");
38498
38704
  }
38705
+ var rubricItemSchema = external_exports.object({
38706
+ id: external_exports.string().describe("Short identifier for this rubric (e.g., clarity, completeness)"),
38707
+ description: external_exports.string().describe("What this rubric checks for"),
38708
+ weight: external_exports.number().default(1).describe("Relative importance (default 1.0)"),
38709
+ required: external_exports.boolean().default(true).describe("Whether this is a mandatory requirement")
38710
+ });
38711
+ var rubricGenerationSchema = external_exports.object({
38712
+ rubrics: external_exports.array(rubricItemSchema).describe("List of evaluation rubrics")
38713
+ });
38714
+ async function generateRubrics(options) {
38715
+ const { expectedOutcome, question, referenceAnswer, provider } = options;
38716
+ const prompt = buildPrompt(expectedOutcome, question, referenceAnswer);
38717
+ const model = provider.asLanguageModel?.();
38718
+ if (!model) {
38719
+ throw new Error("Provider does not support language model interface");
38720
+ }
38721
+ const system = `You are an expert at creating evaluation rubrics.
38722
+ You must return a valid JSON object matching this schema:
38723
+ {
38724
+ "rubrics": [
38725
+ {
38726
+ "id": "string (short identifier)",
38727
+ "description": "string (what to check)",
38728
+ "weight": number (default 1.0),
38729
+ "required": boolean (default true)
38730
+ }
38731
+ ]
38732
+ }`;
38733
+ let result;
38734
+ let lastError;
38735
+ for (let attempt = 1; attempt <= 3; attempt++) {
38736
+ try {
38737
+ const { text: text2 } = await generateText({
38738
+ model,
38739
+ system,
38740
+ prompt
38741
+ });
38742
+ const cleaned = text2.replace(/```json\n?|```/g, "").trim();
38743
+ result = rubricGenerationSchema.parse(JSON.parse(cleaned));
38744
+ break;
38745
+ } catch (e) {
38746
+ lastError = e instanceof Error ? e : new Error(String(e));
38747
+ }
38748
+ }
38749
+ if (!result) {
38750
+ throw new Error(`Failed to parse generated rubrics after 3 attempts: ${lastError?.message}`);
38751
+ }
38752
+ return result.rubrics;
38753
+ }
38754
+ function buildPrompt(expectedOutcome, question, referenceAnswer) {
38755
+ const parts = [
38756
+ "You are an expert at creating evaluation rubrics.",
38757
+ "Given the expected outcome (and optionally the question and reference answer),",
38758
+ "generate a list of specific, measurable rubric items to evaluate whether an answer meets the expected outcome.",
38759
+ "",
38760
+ "Each rubric should:",
38761
+ "- Be specific and testable",
38762
+ "- Have a short, descriptive ID",
38763
+ "- Include a clear description of what to check",
38764
+ "- Indicate if it is required (mandatory) or optional",
38765
+ "- Have an appropriate weight (default 1.0, use higher values for more important aspects)",
38766
+ "",
38767
+ "Generate 3-7 rubric items that comprehensively cover the expected outcome.",
38768
+ "",
38769
+ "[[ ## expected_outcome ## ]]",
38770
+ expectedOutcome,
38771
+ ""
38772
+ ];
38773
+ if (question && question.trim().length > 0) {
38774
+ parts.push("[[ ## question ## ]]", question, "");
38775
+ }
38776
+ if (referenceAnswer && referenceAnswer.trim().length > 0) {
38777
+ parts.push("[[ ## reference_answer ## ]]", referenceAnswer, "");
38778
+ }
38779
+ return parts.join("\n");
38780
+ }
38499
38781
  function createAgentKernel() {
38500
38782
  return { status: "stub" };
38501
38783
  }
@@ -38953,12 +39235,12 @@ var ProgressDisplay = class {
38953
39235
  }
38954
39236
  addLogPaths(paths) {
38955
39237
  const newPaths = [];
38956
- for (const path25 of paths) {
38957
- if (this.logPathSet.has(path25)) {
39238
+ for (const path26 of paths) {
39239
+ if (this.logPathSet.has(path26)) {
38958
39240
  continue;
38959
39241
  }
38960
- this.logPathSet.add(path25);
38961
- newPaths.push(path25);
39242
+ this.logPathSet.add(path26);
39243
+ newPaths.push(path26);
38962
39244
  }
38963
39245
  if (newPaths.length === 0) {
38964
39246
  return;
@@ -38974,8 +39256,8 @@ var ProgressDisplay = class {
38974
39256
  this.hasPrintedLogHeader = true;
38975
39257
  }
38976
39258
  const startIndex = this.logPaths.length - newPaths.length;
38977
- newPaths.forEach((path25, offset) => {
38978
- console.log(`${startIndex + offset + 1}. ${path25}`);
39259
+ newPaths.forEach((path26, offset) => {
39260
+ console.log(`${startIndex + offset + 1}. ${path26}`);
38979
39261
  });
38980
39262
  }
38981
39263
  scheduleRender() {
@@ -39023,8 +39305,8 @@ var ProgressDisplay = class {
39023
39305
  if (this.logPaths.length > 0) {
39024
39306
  lines.push("");
39025
39307
  lines.push("Codex CLI logs:");
39026
- this.logPaths.forEach((path25, index) => {
39027
- lines.push(`${index + 1}. ${path25}`);
39308
+ this.logPaths.forEach((path26, index) => {
39309
+ lines.push(`${index + 1}. ${path26}`);
39028
39310
  });
39029
39311
  }
39030
39312
  const rowCount = this.getRenderedRowCount(lines);
@@ -39229,11 +39511,6 @@ function formatEvaluationSummary(summary) {
39229
39511
  return lines.join("\n");
39230
39512
  }
39231
39513
 
39232
- // src/commands/eval/targets.ts
39233
- import { constants as constants5 } from "node:fs";
39234
- import { access as access5 } from "node:fs/promises";
39235
- import path17 from "node:path";
39236
-
39237
39514
  // ../../packages/core/dist/evaluation/validation/index.js
39238
39515
  import { readFile as readFile7 } from "node:fs/promises";
39239
39516
  import { parse as parse6 } from "yaml";
@@ -39349,13 +39626,13 @@ async function validateEvalFile(filePath) {
39349
39626
  message: "Missing or invalid 'id' field (must be a non-empty string)"
39350
39627
  });
39351
39628
  }
39352
- const outcome = evalCase.outcome;
39353
- if (typeof outcome !== "string" || outcome.trim().length === 0) {
39629
+ const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
39630
+ if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
39354
39631
  errors.push({
39355
39632
  severity: "error",
39356
39633
  filePath: absolutePath,
39357
- location: `${location}.outcome`,
39358
- message: "Missing or invalid 'outcome' field (must be a non-empty string)"
39634
+ location: `${location}.expected_outcome`,
39635
+ message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
39359
39636
  });
39360
39637
  }
39361
39638
  const inputMessages = evalCase.input_messages;
@@ -40090,19 +40367,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
40090
40367
  }
40091
40368
  }
40092
40369
 
40093
- // src/commands/eval/targets.ts
40370
+ // src/utils/targets.ts
40371
+ import { constants as constants5 } from "node:fs";
40372
+ import { access as access5 } from "node:fs/promises";
40373
+ import path17 from "node:path";
40094
40374
  var TARGET_FILE_CANDIDATES = [
40095
40375
  "targets.yaml",
40096
40376
  "targets.yml",
40097
40377
  path17.join(".agentv", "targets.yaml"),
40098
40378
  path17.join(".agentv", "targets.yml")
40099
40379
  ];
40100
- var ANSI_YELLOW7 = "\x1B[33m";
40101
- var ANSI_RED2 = "\x1B[31m";
40102
- var ANSI_RESET7 = "\x1B[0m";
40103
- function isTTY() {
40104
- return process.stdout.isTTY ?? false;
40105
- }
40106
40380
  async function fileExists5(filePath) {
40107
40381
  try {
40108
40382
  await access5(filePath, constants5.F_OK);
@@ -40111,10 +40385,6 @@ async function fileExists5(filePath) {
40111
40385
  return false;
40112
40386
  }
40113
40387
  }
40114
- async function readTestSuiteTarget(testFilePath) {
40115
- const metadata = await readTestSuiteMetadata(testFilePath);
40116
- return metadata.target;
40117
- }
40118
40388
  async function discoverTargetsFile(options) {
40119
40389
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
40120
40390
  if (explicitPath) {
@@ -40145,6 +40415,18 @@ async function discoverTargetsFile(options) {
40145
40415
  }
40146
40416
  throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
40147
40417
  }
40418
+
40419
+ // src/commands/eval/targets.ts
40420
+ var ANSI_YELLOW7 = "\x1B[33m";
40421
+ var ANSI_RED2 = "\x1B[31m";
40422
+ var ANSI_RESET7 = "\x1B[0m";
40423
+ function isTTY() {
40424
+ return process.stdout.isTTY ?? false;
40425
+ }
40426
+ async function readTestSuiteTarget(testFilePath) {
40427
+ const metadata = await readTestSuiteMetadata(testFilePath);
40428
+ return metadata.target;
40429
+ }
40148
40430
  function pickTargetName(options) {
40149
40431
  const cliName = options.cliTargetName?.trim();
40150
40432
  if (cliName && cliName !== "default") {
@@ -40731,14 +41013,174 @@ async function resolveEvalPaths(evalPaths, cwd) {
40731
41013
  return sorted;
40732
41014
  }
40733
41015
 
41016
+ // src/commands/generate/rubrics.ts
41017
+ import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
41018
+ import path20 from "node:path";
41019
+ import { pathToFileURL as pathToFileURL2 } from "node:url";
41020
+ import { isMap, isSeq, parseDocument } from "yaml";
41021
+ function isJsonObject3(value) {
41022
+ return typeof value === "object" && value !== null && !Array.isArray(value);
41023
+ }
41024
+ function asString6(value) {
41025
+ return typeof value === "string" ? value : void 0;
41026
+ }
41027
+ async function loadRubricGenerator() {
41028
+ const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
41029
+ if (customGenerator) {
41030
+ const generatorPath = path20.resolve(customGenerator);
41031
+ const generatorUrl = pathToFileURL2(generatorPath).href;
41032
+ const module = await import(generatorUrl);
41033
+ return module.generateRubrics;
41034
+ }
41035
+ return generateRubrics;
41036
+ }
41037
+ async function generateRubricsCommand(options) {
41038
+ const { file: file2, target: targetOverride, verbose } = options;
41039
+ console.log(`Generating rubrics for: ${file2}`);
41040
+ const absolutePath = path20.resolve(file2);
41041
+ const content = await readFile8(absolutePath, "utf8");
41042
+ const doc = parseDocument(content);
41043
+ const parsed = doc.toJSON();
41044
+ if (!isJsonObject3(parsed)) {
41045
+ throw new Error(`Invalid YAML file format: ${file2}`);
41046
+ }
41047
+ const suite = parsed;
41048
+ const evalcases = suite.evalcases;
41049
+ if (!Array.isArray(evalcases)) {
41050
+ throw new Error(`No evalcases found in ${file2}`);
41051
+ }
41052
+ const targetSelection = await selectTarget({
41053
+ testFilePath: absolutePath,
41054
+ repoRoot: process.cwd(),
41055
+ cwd: process.cwd(),
41056
+ cliTargetName: targetOverride,
41057
+ dryRun: false,
41058
+ dryRunDelay: 0,
41059
+ dryRunDelayMin: 0,
41060
+ dryRunDelayMax: 0,
41061
+ env: process.env
41062
+ });
41063
+ if (verbose) {
41064
+ console.log(`Using target: ${targetSelection.targetName}`);
41065
+ }
41066
+ const provider = createProvider(targetSelection.resolvedTarget);
41067
+ const generateRubricsFunc = await loadRubricGenerator();
41068
+ let updatedCount = 0;
41069
+ let skippedCount = 0;
41070
+ const evalcasesNode = doc.getIn(["evalcases"]);
41071
+ if (!evalcasesNode || !isSeq(evalcasesNode)) {
41072
+ throw new Error("evalcases must be a sequence");
41073
+ }
41074
+ for (let i = 0; i < evalcases.length; i++) {
41075
+ const rawCase = evalcases[i];
41076
+ if (!isJsonObject3(rawCase)) {
41077
+ continue;
41078
+ }
41079
+ const evalCase = rawCase;
41080
+ const id = asString6(evalCase.id) ?? "unknown";
41081
+ const expectedOutcome = asString6(evalCase.expected_outcome) ?? asString6(evalCase.outcome);
41082
+ if (!expectedOutcome) {
41083
+ if (verbose) {
41084
+ console.log(` Skipping ${id}: no expected_outcome`);
41085
+ }
41086
+ skippedCount++;
41087
+ continue;
41088
+ }
41089
+ if (evalCase.rubrics !== void 0) {
41090
+ if (verbose) {
41091
+ console.log(` Skipping ${id}: rubrics already defined`);
41092
+ }
41093
+ skippedCount++;
41094
+ continue;
41095
+ }
41096
+ console.log(` Generating rubrics for: ${id}`);
41097
+ const question = extractQuestion(evalCase);
41098
+ const referenceAnswer = asString6(evalCase.reference_answer);
41099
+ const rubrics = await generateRubricsFunc({
41100
+ expectedOutcome,
41101
+ question,
41102
+ referenceAnswer,
41103
+ provider
41104
+ });
41105
+ const caseNode = evalcasesNode.items[i];
41106
+ if (caseNode && isMap(caseNode)) {
41107
+ caseNode.set(
41108
+ "rubrics",
41109
+ rubrics.map(
41110
+ (r) => ({
41111
+ id: r.id,
41112
+ description: r.description,
41113
+ weight: r.weight,
41114
+ required: r.required
41115
+ })
41116
+ )
41117
+ );
41118
+ }
41119
+ updatedCount++;
41120
+ if (verbose) {
41121
+ console.log(` Generated ${rubrics.length} rubric(s)`);
41122
+ }
41123
+ }
41124
+ if (updatedCount > 0) {
41125
+ const output = doc.toString();
41126
+ await writeFile6(absolutePath, output, "utf8");
41127
+ console.log(`
41128
+ Updated ${updatedCount} eval case(s) with generated rubrics`);
41129
+ if (skippedCount > 0) {
41130
+ console.log(`Skipped ${skippedCount} eval case(s)`);
41131
+ }
41132
+ } else {
41133
+ console.log("\nNo eval cases updated (all already have rubrics or missing expected_outcome)");
41134
+ }
41135
+ }
41136
+ function extractQuestion(evalCase) {
41137
+ const explicitQuestion = asString6(evalCase.question);
41138
+ if (explicitQuestion) {
41139
+ return explicitQuestion;
41140
+ }
41141
+ const inputMessages = evalCase.input_messages;
41142
+ if (!Array.isArray(inputMessages)) {
41143
+ return void 0;
41144
+ }
41145
+ for (const msg of inputMessages) {
41146
+ if (!isJsonObject3(msg)) {
41147
+ continue;
41148
+ }
41149
+ if (msg.role === "user" && typeof msg.content === "string") {
41150
+ return msg.content;
41151
+ }
41152
+ }
41153
+ return void 0;
41154
+ }
41155
+
41156
+ // src/commands/generate/index.ts
41157
+ function registerGenerateCommand(program) {
41158
+ const generate = program.command("generate").description("Generate evaluation artifacts");
41159
+ generate.command("rubrics <file>").description("Generate rubrics from expected_outcome in YAML eval file").option(
41160
+ "-t, --target <target>",
41161
+ "Override target for rubric generation (default: file target or openai:gpt-4o)"
41162
+ ).option("-v, --verbose", "Show detailed progress").action(async (file2, options) => {
41163
+ try {
41164
+ await generateRubricsCommand({
41165
+ file: file2,
41166
+ target: options.target,
41167
+ verbose: options.verbose
41168
+ });
41169
+ } catch (error40) {
41170
+ console.error(`Error: ${error40.message}`);
41171
+ process.exit(1);
41172
+ }
41173
+ });
41174
+ }
41175
+
40734
41176
  // src/commands/init/index.ts
40735
41177
  import { existsSync, mkdirSync, writeFileSync } from "node:fs";
40736
- import path21 from "node:path";
41178
+ import path24 from "node:path";
40737
41179
  import * as readline from "node:readline/promises";
40738
41180
 
40739
41181
  // src/templates/index.ts
40740
41182
  import { readFileSync, readdirSync, statSync } from "node:fs";
40741
- import path20 from "node:path";
41183
+ import path21 from "node:path";
40742
41184
  import { fileURLToPath } from "node:url";
40743
41185
  function getGithubTemplates() {
40744
41186
  return getTemplatesFromDir(".github");
@@ -40750,12 +41192,12 @@ function getClaudeTemplates() {
40750
41192
  return getTemplatesFromDir(".claude");
40751
41193
  }
40752
41194
  function getTemplatesFromDir(subdir) {
40753
- const currentDir = path20.dirname(fileURLToPath(import.meta.url));
41195
+ const currentDir = path21.dirname(fileURLToPath(import.meta.url));
40754
41196
  let templatesDir;
40755
- if (currentDir.includes(`${path20.sep}dist`)) {
40756
- templatesDir = path20.join(currentDir, "templates", subdir);
41197
+ if (currentDir.includes(`${path21.sep}dist`)) {
41198
+ templatesDir = path21.join(currentDir, "templates", subdir);
40757
41199
  } else {
40758
- templatesDir = path20.join(currentDir, subdir);
41200
+ templatesDir = path21.join(currentDir, subdir);
40759
41201
  }
40760
41202
  return readTemplatesRecursively(templatesDir, "");
40761
41203
  }
@@ -40763,15 +41205,15 @@ function readTemplatesRecursively(dir, relativePath) {
40763
41205
  const templates = [];
40764
41206
  const entries = readdirSync(dir);
40765
41207
  for (const entry of entries) {
40766
- const fullPath = path20.join(dir, entry);
41208
+ const fullPath = path21.join(dir, entry);
40767
41209
  const stat6 = statSync(fullPath);
40768
- const entryRelativePath = relativePath ? path20.join(relativePath, entry) : entry;
41210
+ const entryRelativePath = relativePath ? path21.join(relativePath, entry) : entry;
40769
41211
  if (stat6.isDirectory()) {
40770
41212
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
40771
41213
  } else {
40772
41214
  const content = readFileSync(fullPath, "utf-8");
40773
41215
  templates.push({
40774
- path: entryRelativePath.split(path20.sep).join("/"),
41216
+ path: entryRelativePath.split(path21.sep).join("/"),
40775
41217
  // Normalize to forward slashes
40776
41218
  content
40777
41219
  });
@@ -40794,10 +41236,10 @@ async function promptYesNo(message) {
40794
41236
  }
40795
41237
  }
40796
41238
  async function initCommand(options = {}) {
40797
- const targetPath = path21.resolve(options.targetPath ?? ".");
40798
- const githubDir = path21.join(targetPath, ".github");
40799
- const agentvDir = path21.join(targetPath, ".agentv");
40800
- const claudeDir = path21.join(targetPath, ".claude");
41239
+ const targetPath = path24.resolve(options.targetPath ?? ".");
41240
+ const githubDir = path24.join(targetPath, ".github");
41241
+ const agentvDir = path24.join(targetPath, ".agentv");
41242
+ const claudeDir = path24.join(targetPath, ".claude");
40801
41243
  const githubTemplates = getGithubTemplates();
40802
41244
  const agentvTemplates = getAgentvTemplates();
40803
41245
  const claudeTemplates = getClaudeTemplates();
@@ -40805,32 +41247,32 @@ async function initCommand(options = {}) {
40805
41247
  const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
40806
41248
  const existingFiles = [];
40807
41249
  if (envTemplate) {
40808
- const envFilePath = path21.join(targetPath, ".env.template");
41250
+ const envFilePath = path24.join(targetPath, ".env.template");
40809
41251
  if (existsSync(envFilePath)) {
40810
41252
  existingFiles.push(".env.template");
40811
41253
  }
40812
41254
  }
40813
41255
  if (existsSync(githubDir)) {
40814
41256
  for (const template of githubTemplates) {
40815
- const targetFilePath = path21.join(githubDir, template.path);
41257
+ const targetFilePath = path24.join(githubDir, template.path);
40816
41258
  if (existsSync(targetFilePath)) {
40817
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41259
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40818
41260
  }
40819
41261
  }
40820
41262
  }
40821
41263
  if (existsSync(agentvDir)) {
40822
41264
  for (const template of otherAgentvTemplates) {
40823
- const targetFilePath = path21.join(agentvDir, template.path);
41265
+ const targetFilePath = path24.join(agentvDir, template.path);
40824
41266
  if (existsSync(targetFilePath)) {
40825
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41267
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40826
41268
  }
40827
41269
  }
40828
41270
  }
40829
41271
  if (existsSync(claudeDir)) {
40830
41272
  for (const template of claudeTemplates) {
40831
- const targetFilePath = path21.join(claudeDir, template.path);
41273
+ const targetFilePath = path24.join(claudeDir, template.path);
40832
41274
  if (existsSync(targetFilePath)) {
40833
- existingFiles.push(path21.relative(targetPath, targetFilePath));
41275
+ existingFiles.push(path24.relative(targetPath, targetFilePath));
40834
41276
  }
40835
41277
  }
40836
41278
  }
@@ -40857,36 +41299,36 @@ async function initCommand(options = {}) {
40857
41299
  mkdirSync(claudeDir, { recursive: true });
40858
41300
  }
40859
41301
  if (envTemplate) {
40860
- const envFilePath = path21.join(targetPath, ".env.template");
41302
+ const envFilePath = path24.join(targetPath, ".env.template");
40861
41303
  writeFileSync(envFilePath, envTemplate.content, "utf-8");
40862
41304
  console.log("Created .env.template");
40863
41305
  }
40864
41306
  for (const template of githubTemplates) {
40865
- const targetFilePath = path21.join(githubDir, template.path);
40866
- const targetDirPath = path21.dirname(targetFilePath);
41307
+ const targetFilePath = path24.join(githubDir, template.path);
41308
+ const targetDirPath = path24.dirname(targetFilePath);
40867
41309
  if (!existsSync(targetDirPath)) {
40868
41310
  mkdirSync(targetDirPath, { recursive: true });
40869
41311
  }
40870
41312
  writeFileSync(targetFilePath, template.content, "utf-8");
40871
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41313
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40872
41314
  }
40873
41315
  for (const template of otherAgentvTemplates) {
40874
- const targetFilePath = path21.join(agentvDir, template.path);
40875
- const targetDirPath = path21.dirname(targetFilePath);
41316
+ const targetFilePath = path24.join(agentvDir, template.path);
41317
+ const targetDirPath = path24.dirname(targetFilePath);
40876
41318
  if (!existsSync(targetDirPath)) {
40877
41319
  mkdirSync(targetDirPath, { recursive: true });
40878
41320
  }
40879
41321
  writeFileSync(targetFilePath, template.content, "utf-8");
40880
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41322
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40881
41323
  }
40882
41324
  for (const template of claudeTemplates) {
40883
- const targetFilePath = path21.join(claudeDir, template.path);
40884
- const targetDirPath = path21.dirname(targetFilePath);
41325
+ const targetFilePath = path24.join(claudeDir, template.path);
41326
+ const targetDirPath = path24.dirname(targetFilePath);
40885
41327
  if (!existsSync(targetDirPath)) {
40886
41328
  mkdirSync(targetDirPath, { recursive: true });
40887
41329
  }
40888
41330
  writeFileSync(targetFilePath, template.content, "utf-8");
40889
- console.log(`Created ${path21.relative(targetPath, targetFilePath)}`);
41331
+ console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
40890
41332
  }
40891
41333
  console.log("\nAgentV initialized successfully!");
40892
41334
  console.log("\nFiles installed to root:");
@@ -40894,17 +41336,17 @@ async function initCommand(options = {}) {
40894
41336
  console.log(" - .env.template");
40895
41337
  }
40896
41338
  console.log(`
40897
- Files installed to ${path21.relative(targetPath, githubDir)}:`);
41339
+ Files installed to ${path24.relative(targetPath, githubDir)}:`);
40898
41340
  for (const t of githubTemplates) {
40899
41341
  console.log(` - ${t.path}`);
40900
41342
  }
40901
41343
  console.log(`
40902
- Files installed to ${path21.relative(targetPath, agentvDir)}:`);
41344
+ Files installed to ${path24.relative(targetPath, agentvDir)}:`);
40903
41345
  for (const t of otherAgentvTemplates) {
40904
41346
  console.log(` - ${t.path}`);
40905
41347
  }
40906
41348
  console.log(`
40907
- Files installed to ${path21.relative(targetPath, claudeDir)}:`);
41349
+ Files installed to ${path24.relative(targetPath, claudeDir)}:`);
40908
41350
  for (const t of claudeTemplates) {
40909
41351
  console.log(` - ${t.path}`);
40910
41352
  }
@@ -41006,7 +41448,7 @@ function isTTY2() {
41006
41448
  // src/commands/validate/validate-files.ts
41007
41449
  import { constants as constants7 } from "node:fs";
41008
41450
  import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
41009
- import path24 from "node:path";
41451
+ import path25 from "node:path";
41010
41452
  async function validateFiles(paths) {
41011
41453
  const filePaths = await expandPaths(paths);
41012
41454
  const results = [];
@@ -41024,7 +41466,7 @@ async function validateFiles(paths) {
41024
41466
  };
41025
41467
  }
41026
41468
  async function validateSingleFile(filePath) {
41027
- const absolutePath = path24.resolve(filePath);
41469
+ const absolutePath = path25.resolve(filePath);
41028
41470
  const fileType = await detectFileType(absolutePath);
41029
41471
  if (fileType === "unknown") {
41030
41472
  return {
@@ -41063,7 +41505,7 @@ async function validateSingleFile(filePath) {
41063
41505
  async function expandPaths(paths) {
41064
41506
  const expanded = [];
41065
41507
  for (const inputPath of paths) {
41066
- const absolutePath = path24.resolve(inputPath);
41508
+ const absolutePath = path25.resolve(inputPath);
41067
41509
  try {
41068
41510
  await access7(absolutePath, constants7.F_OK);
41069
41511
  } catch {
@@ -41087,7 +41529,7 @@ async function findYamlFiles(dirPath) {
41087
41529
  try {
41088
41530
  const entries = await readdir3(dirPath, { withFileTypes: true });
41089
41531
  for (const entry of entries) {
41090
- const fullPath = path24.join(dirPath, entry.name);
41532
+ const fullPath = path25.join(dirPath, entry.name);
41091
41533
  if (entry.isDirectory()) {
41092
41534
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
41093
41535
  continue;
@@ -41104,7 +41546,7 @@ async function findYamlFiles(dirPath) {
41104
41546
  return results;
41105
41547
  }
41106
41548
  function isYamlFile(filePath) {
41107
- const ext = path24.extname(filePath).toLowerCase();
41549
+ const ext = path25.extname(filePath).toLowerCase();
41108
41550
  return ext === ".yaml" || ext === ".yml";
41109
41551
  }
41110
41552
 
@@ -41141,6 +41583,7 @@ function createProgram() {
41141
41583
  registerStatusCommand(program);
41142
41584
  registerEvalCommand(program);
41143
41585
  registerValidateCommand(program);
41586
+ registerGenerateCommand(program);
41144
41587
  program.command("init [path]").description(
41145
41588
  "Initialize AgentV in your project (installs prompt templates and schema to .github)"
41146
41589
  ).action(async (targetPath) => {
@@ -41163,4 +41606,4 @@ export {
41163
41606
  createProgram,
41164
41607
  runCli
41165
41608
  };
41166
- //# sourceMappingURL=chunk-N7M3URIJ.js.map
41609
+ //# sourceMappingURL=chunk-MA3MJNJH.js.map