agentv 2.1.1 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -373,9 +373,9 @@ var compareCommand = command({
373
373
 
374
374
  // src/commands/convert/index.ts
375
375
  import { readFileSync as readFileSync2, writeFileSync } from "node:fs";
376
- import path15 from "node:path";
376
+ import path16 from "node:path";
377
377
 
378
- // ../../packages/core/dist/chunk-KDEP4I7G.js
378
+ // ../../packages/core/dist/chunk-RP3M7COZ.js
379
379
  import { constants } from "node:fs";
380
380
  import { access, readFile } from "node:fs/promises";
381
381
  import path from "node:path";
@@ -859,8 +859,8 @@ function getErrorMap() {
859
859
 
860
860
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
861
861
  var makeIssue = (params) => {
862
- const { data, path: path29, errorMaps, issueData } = params;
863
- const fullPath = [...path29, ...issueData.path || []];
862
+ const { data, path: path30, errorMaps, issueData } = params;
863
+ const fullPath = [...path30, ...issueData.path || []];
864
864
  const fullIssue = {
865
865
  ...issueData,
866
866
  path: fullPath
@@ -976,11 +976,11 @@ var errorUtil;
976
976
 
977
977
  // ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
978
978
  var ParseInputLazyPath = class {
979
- constructor(parent, value, path29, key2) {
979
+ constructor(parent, value, path30, key2) {
980
980
  this._cachedPath = [];
981
981
  this.parent = parent;
982
982
  this.data = value;
983
- this._path = path29;
983
+ this._path = path30;
984
984
  this._key = key2;
985
985
  }
986
986
  get path() {
@@ -4422,7 +4422,7 @@ var coerce = {
4422
4422
  };
4423
4423
  var NEVER = INVALID;
4424
4424
 
4425
- // ../../packages/core/dist/chunk-KDEP4I7G.js
4425
+ // ../../packages/core/dist/chunk-RP3M7COZ.js
4426
4426
  async function fileExists(filePath) {
4427
4427
  try {
4428
4428
  await access(filePath, constants.F_OK);
@@ -5418,8 +5418,9 @@ function isAgentProvider(provider) {
5418
5418
  }
5419
5419
 
5420
5420
  // ../../packages/core/dist/index.js
5421
- import { readFile as readFile5 } from "node:fs/promises";
5422
- import path62 from "node:path";
5421
+ import { readFile as readFile6 } from "node:fs/promises";
5422
+ import path72 from "node:path";
5423
+ import micromatch3 from "micromatch";
5423
5424
  import { parse as parse22 } from "yaml";
5424
5425
  import { readFile as readFile4 } from "node:fs/promises";
5425
5426
  import path22 from "node:path";
@@ -5430,10 +5431,14 @@ import { access as access3 } from "node:fs/promises";
5430
5431
  import path13 from "node:path";
5431
5432
  import path32 from "node:path";
5432
5433
  import { readFile as readFile22 } from "node:fs/promises";
5433
- import { readFile as readFile32 } from "node:fs/promises";
5434
- import path42 from "node:path";
5435
5434
  import { readFile as readFile42 } from "node:fs/promises";
5436
5435
  import path52 from "node:path";
5436
+ import micromatch2 from "micromatch";
5437
+ import { parse as parseYaml } from "yaml";
5438
+ import { readFile as readFile32 } from "node:fs/promises";
5439
+ import path42 from "node:path";
5440
+ import { readFile as readFile5 } from "node:fs/promises";
5441
+ import path62 from "node:path";
5437
5442
 
5438
5443
  // ../../node_modules/.bun/@ai-sdk+provider@2.0.0/node_modules/@ai-sdk/provider/dist/index.mjs
5439
5444
  var marker = "vercel.ai.error";
@@ -6523,10 +6528,10 @@ function assignProp(target, prop, value) {
6523
6528
  configurable: true
6524
6529
  });
6525
6530
  }
6526
- function getElementAtPath(obj, path29) {
6527
- if (!path29)
6531
+ function getElementAtPath(obj, path30) {
6532
+ if (!path30)
6528
6533
  return obj;
6529
- return path29.reduce((acc, key2) => acc?.[key2], obj);
6534
+ return path30.reduce((acc, key2) => acc?.[key2], obj);
6530
6535
  }
6531
6536
  function promiseAllObject(promisesObj) {
6532
6537
  const keys = Object.keys(promisesObj);
@@ -6846,11 +6851,11 @@ function aborted(x, startIndex = 0) {
6846
6851
  }
6847
6852
  return false;
6848
6853
  }
6849
- function prefixIssues(path29, issues) {
6854
+ function prefixIssues(path30, issues) {
6850
6855
  return issues.map((iss) => {
6851
6856
  var _a17;
6852
6857
  (_a17 = iss).path ?? (_a17.path = []);
6853
- iss.path.unshift(path29);
6858
+ iss.path.unshift(path30);
6854
6859
  return iss;
6855
6860
  });
6856
6861
  }
@@ -6987,7 +6992,7 @@ function treeifyError(error40, _mapper) {
6987
6992
  return issue2.message;
6988
6993
  };
6989
6994
  const result = { errors: [] };
6990
- const processError = (error41, path29 = []) => {
6995
+ const processError = (error41, path30 = []) => {
6991
6996
  var _a17, _b8;
6992
6997
  for (const issue2 of error41.issues) {
6993
6998
  if (issue2.code === "invalid_union" && issue2.errors.length) {
@@ -6997,7 +7002,7 @@ function treeifyError(error40, _mapper) {
6997
7002
  } else if (issue2.code === "invalid_element") {
6998
7003
  processError({ issues: issue2.issues }, issue2.path);
6999
7004
  } else {
7000
- const fullpath = [...path29, ...issue2.path];
7005
+ const fullpath = [...path30, ...issue2.path];
7001
7006
  if (fullpath.length === 0) {
7002
7007
  result.errors.push(mapper(issue2));
7003
7008
  continue;
@@ -7027,9 +7032,9 @@ function treeifyError(error40, _mapper) {
7027
7032
  processError(error40);
7028
7033
  return result;
7029
7034
  }
7030
- function toDotPath(path29) {
7035
+ function toDotPath(path30) {
7031
7036
  const segs = [];
7032
- for (const seg of path29) {
7037
+ for (const seg of path30) {
7033
7038
  if (typeof seg === "number")
7034
7039
  segs.push(`[${seg}]`);
7035
7040
  else if (typeof seg === "symbol")
@@ -26582,14 +26587,14 @@ function createAzure(options = {}) {
26582
26587
  description: "Azure OpenAI resource name"
26583
26588
  });
26584
26589
  const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
26585
- const url2 = ({ path: path29, modelId }) => {
26590
+ const url2 = ({ path: path30, modelId }) => {
26586
26591
  var _a24;
26587
26592
  const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
26588
26593
  let fullUrl;
26589
26594
  if (options.useDeploymentBasedUrls) {
26590
- fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path29}`);
26595
+ fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path30}`);
26591
26596
  } else {
26592
- fullUrl = new URL(`${baseUrlPrefix}/v1${path29}`);
26597
+ fullUrl = new URL(`${baseUrlPrefix}/v1${path30}`);
26593
26598
  }
26594
26599
  fullUrl.searchParams.set("api-version", apiVersion);
26595
26600
  return fullUrl.toString();
@@ -33025,27 +33030,27 @@ import { randomUUID } from "node:crypto";
33025
33030
  import { createWriteStream } from "node:fs";
33026
33031
  import { mkdir as mkdir4, mkdtemp, rm as rm2, writeFile as writeFile5 } from "node:fs/promises";
33027
33032
  import { tmpdir } from "node:os";
33033
+ import path92 from "node:path";
33028
33034
  import path82 from "node:path";
33029
- import path72 from "node:path";
33030
33035
  import { exec as execWithCallback } from "node:child_process";
33031
33036
  import fs from "node:fs/promises";
33032
33037
  import os2 from "node:os";
33033
- import path92 from "node:path";
33038
+ import path102 from "node:path";
33034
33039
  import { promisify as promisify2 } from "node:util";
33035
33040
  import { exec as execCallback, spawn as spawn22 } from "node:child_process";
33036
33041
  import { randomUUID as randomUUID2 } from "node:crypto";
33037
33042
  import { constants as constants22, createWriteStream as createWriteStream2 } from "node:fs";
33038
33043
  import { access as access22, mkdir as mkdir22, mkdtemp as mkdtemp2, rm as rm22, writeFile as writeFile22 } from "node:fs/promises";
33039
33044
  import { tmpdir as tmpdir2 } from "node:os";
33040
- import path102 from "node:path";
33045
+ import path112 from "node:path";
33041
33046
  import { promisify as promisify22 } from "node:util";
33042
33047
  import { spawn as spawn3 } from "node:child_process";
33043
33048
  import { randomUUID as randomUUID3 } from "node:crypto";
33044
33049
  import { createWriteStream as createWriteStream3 } from "node:fs";
33045
33050
  import { mkdir as mkdir32, mkdtemp as mkdtemp3, rm as rm3, writeFile as writeFile32 } from "node:fs/promises";
33046
33051
  import { tmpdir as tmpdir3 } from "node:os";
33047
- import path112 from "node:path";
33048
33052
  import path122 from "node:path";
33053
+ import path132 from "node:path";
33049
33054
 
33050
33055
  // ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/agentDispatch.js
33051
33056
  import { stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
@@ -35067,13 +35072,14 @@ async function provisionSubagents(options) {
35067
35072
 
35068
35073
  // ../../packages/core/dist/index.js
35069
35074
  import { constants as constants32 } from "node:fs";
35070
- import { access as access32, readFile as readFile6 } from "node:fs/promises";
35071
- import path132 from "node:path";
35075
+ import { access as access32, readFile as readFile7 } from "node:fs/promises";
35076
+ import path14 from "node:path";
35072
35077
  import { parse as parse32 } from "yaml";
35073
35078
  import { randomBytes } from "node:crypto";
35074
35079
  import { createServer } from "node:http";
35075
35080
  import { createHash } from "node:crypto";
35076
- import path14 from "node:path";
35081
+ import path15 from "node:path";
35082
+ import micromatch4 from "micromatch";
35077
35083
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
35078
35084
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
35079
35085
  function isTestMessageRole(value) {
@@ -35449,11 +35455,11 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35449
35455
  );
35450
35456
  }
35451
35457
  }
35452
- const knownProps = /* @__PURE__ */ new Set(["name", "type", "script", "cwd", "weight", "target"]);
35453
- const config2 = {};
35458
+ const knownProps2 = /* @__PURE__ */ new Set(["name", "type", "script", "cwd", "weight", "target"]);
35459
+ const config22 = {};
35454
35460
  for (const [key2, value] of Object.entries(rawEvaluator)) {
35455
- if (!knownProps.has(key2) && value !== void 0) {
35456
- config2[key2] = value;
35461
+ if (!knownProps2.has(key2) && value !== void 0) {
35462
+ config22[key2] = value;
35457
35463
  }
35458
35464
  }
35459
35465
  evaluators.push({
@@ -35463,7 +35469,7 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35463
35469
  cwd,
35464
35470
  resolvedCwd,
35465
35471
  ...weight2 !== void 0 ? { weight: weight2 } : {},
35466
- ...Object.keys(config2).length > 0 ? { config: config2 } : {},
35472
+ ...Object.keys(config22).length > 0 ? { config: config22 } : {},
35467
35473
  ...targetConfig !== void 0 ? { target: targetConfig } : {}
35468
35474
  });
35469
35475
  continue;
@@ -35628,7 +35634,7 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35628
35634
  continue;
35629
35635
  }
35630
35636
  const weight2 = validateWeight(rawEvaluator.weight, name16, evalId);
35631
- const config2 = {
35637
+ const config22 = {
35632
35638
  name: name16,
35633
35639
  type: "tool_trajectory",
35634
35640
  mode,
@@ -35636,7 +35642,7 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35636
35642
  ...expected ? { expected } : {},
35637
35643
  ...weight2 !== void 0 ? { weight: weight2 } : {}
35638
35644
  };
35639
- evaluators.push(config2);
35645
+ evaluators.push(config22);
35640
35646
  continue;
35641
35647
  }
35642
35648
  if (typeValue === "field_accuracy") {
@@ -35773,9 +35779,33 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35773
35779
  });
35774
35780
  continue;
35775
35781
  }
35776
- const prompt = asString(rawEvaluator.prompt);
35782
+ const rawPrompt = rawEvaluator.prompt;
35783
+ let prompt;
35777
35784
  let promptPath;
35778
- if (prompt) {
35785
+ let resolvedPromptScript;
35786
+ let promptScriptConfig;
35787
+ if (isJsonObject2(rawPrompt)) {
35788
+ const scriptArray = asStringArray(
35789
+ rawPrompt.script,
35790
+ `prompt.script for evaluator '${name16}' in '${evalId}'`
35791
+ );
35792
+ if (!scriptArray) {
35793
+ throw new Error(`Evaluator '${name16}' in '${evalId}': prompt object requires script array`);
35794
+ }
35795
+ const scriptPath = scriptArray[scriptArray.length - 1];
35796
+ const resolved = await resolveFileReference2(scriptPath, searchRoots);
35797
+ if (resolved.resolvedPath) {
35798
+ resolvedPromptScript = [...scriptArray.slice(0, -1), path32.resolve(resolved.resolvedPath)];
35799
+ } else {
35800
+ throw new Error(
35801
+ `Evaluator '${name16}' in '${evalId}': prompt script file not found: ${resolved.displayPath}`
35802
+ );
35803
+ }
35804
+ if (isJsonObject2(rawPrompt.config)) {
35805
+ promptScriptConfig = rawPrompt.config;
35806
+ }
35807
+ } else if (typeof rawPrompt === "string") {
35808
+ prompt = rawPrompt;
35779
35809
  const resolved = await resolveFileReference2(prompt, searchRoots);
35780
35810
  if (resolved.resolvedPath) {
35781
35811
  promptPath = path32.resolve(resolved.resolvedPath);
@@ -35794,12 +35824,7 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35794
35824
  }
35795
35825
  const _model = asString(rawEvaluator.model);
35796
35826
  const rawRubrics = rawEvaluator.rubrics;
35797
- const parsedRubrics = Array.isArray(rawRubrics) ? rawRubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
35798
- id: asString(rubric.id) ?? `rubric-${index + 1}`,
35799
- description: asString(rubric.description) ?? "",
35800
- weight: typeof rubric.weight === "number" ? rubric.weight : 1,
35801
- required: typeof rubric.required === "boolean" ? rubric.required : true
35802
- })).filter((r) => r.description.length > 0) : void 0;
35827
+ const parsedRubrics = Array.isArray(rawRubrics) ? parseRubricItems(rawRubrics, name16, evalId) : void 0;
35803
35828
  if (typeValue === "rubric") {
35804
35829
  if (!parsedRubrics) {
35805
35830
  logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': missing rubrics array`);
@@ -35819,13 +35844,26 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
35819
35844
  continue;
35820
35845
  }
35821
35846
  const weight = validateWeight(rawEvaluator.weight, name16, evalId);
35847
+ const knownProps = /* @__PURE__ */ new Set(["name", "type", "prompt", "model", "rubrics", "weight", "config"]);
35848
+ const config2 = {};
35849
+ for (const [key2, value] of Object.entries(rawEvaluator)) {
35850
+ if (!knownProps.has(key2) && value !== void 0) {
35851
+ config2[key2] = value;
35852
+ }
35853
+ }
35854
+ const topLevelConfig = isJsonObject2(rawEvaluator.config) ? rawEvaluator.config : {};
35855
+ const mergedConfig = { ...config2, ...topLevelConfig };
35856
+ const finalConfig = promptScriptConfig ?? (Object.keys(mergedConfig).length > 0 ? mergedConfig : void 0);
35822
35857
  evaluators.push({
35823
35858
  name: name16,
35824
35859
  type: "llm_judge",
35825
35860
  prompt,
35826
35861
  promptPath,
35862
+ ...promptPath ? { resolvedPromptPath: promptPath } : {},
35863
+ ...resolvedPromptScript ? { resolvedPromptScript } : {},
35827
35864
  ...parsedRubrics && parsedRubrics.length > 0 ? { rubrics: parsedRubrics } : {},
35828
- ...weight !== void 0 ? { weight } : {}
35865
+ ...weight !== void 0 ? { weight } : {},
35866
+ ...finalConfig ? { config: finalConfig } : {}
35829
35867
  });
35830
35868
  }
35831
35869
  return evaluators.length > 0 ? evaluators : void 0;
@@ -35912,6 +35950,185 @@ var VALID_FIELD_AGGREGATION_TYPES = /* @__PURE__ */ new Set(["weighted_average",
35912
35950
  function isValidFieldAggregationType(value) {
35913
35951
  return typeof value === "string" && VALID_FIELD_AGGREGATION_TYPES.has(value);
35914
35952
  }
35953
+ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
35954
+ const items = [];
35955
+ for (const [index, rawRubric] of rawRubrics.entries()) {
35956
+ if (!isJsonObject2(rawRubric)) {
35957
+ logWarning2(
35958
+ `Skipping invalid rubric entry at index ${index} in evaluator '${evaluatorName}' (expected object)`
35959
+ );
35960
+ continue;
35961
+ }
35962
+ const id = asString(rawRubric.id) ?? `rubric-${index + 1}`;
35963
+ const expectedOutcome = asString(rawRubric.expected_outcome) ?? asString(rawRubric.description) ?? "";
35964
+ const weight = typeof rawRubric.weight === "number" ? rawRubric.weight : 1;
35965
+ let requiredMinScore;
35966
+ let required2;
35967
+ if (typeof rawRubric.required_min_score === "number") {
35968
+ const minScore = rawRubric.required_min_score;
35969
+ if (!Number.isInteger(minScore) || minScore < 0 || minScore > 10) {
35970
+ throw new Error(
35971
+ `Invalid required_min_score for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an integer 0-10 (got ${minScore})`
35972
+ );
35973
+ }
35974
+ requiredMinScore = minScore;
35975
+ }
35976
+ if (typeof rawRubric.required === "boolean") {
35977
+ required2 = rawRubric.required;
35978
+ }
35979
+ let scoreRanges;
35980
+ const rawScoreRanges = rawRubric.score_ranges;
35981
+ if (rawScoreRanges !== void 0) {
35982
+ if (!Array.isArray(rawScoreRanges)) {
35983
+ throw new Error(
35984
+ `Invalid score_ranges for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an array`
35985
+ );
35986
+ }
35987
+ scoreRanges = parseScoreRanges(rawScoreRanges, id, evaluatorName, evalId);
35988
+ items.push({
35989
+ id,
35990
+ weight,
35991
+ ...expectedOutcome.length > 0 ? { expected_outcome: expectedOutcome } : {},
35992
+ ...required2 !== void 0 ? { required: required2 } : {},
35993
+ ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {},
35994
+ score_ranges: scoreRanges
35995
+ });
35996
+ } else {
35997
+ if (expectedOutcome.length === 0) {
35998
+ logWarning2(
35999
+ `Skipping rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': missing expected_outcome`
36000
+ );
36001
+ continue;
36002
+ }
36003
+ items.push({
36004
+ id,
36005
+ expected_outcome: expectedOutcome,
36006
+ weight,
36007
+ // Default to required: true if not specified (backward compatibility)
36008
+ required: required2 ?? true,
36009
+ ...requiredMinScore !== void 0 ? { required_min_score: requiredMinScore } : {}
36010
+ });
36011
+ }
36012
+ }
36013
+ return items.length > 0 ? items : void 0;
36014
+ }
36015
+ function parseScoreRanges(rawRanges, rubricId, evaluatorName, evalId) {
36016
+ const ranges = [];
36017
+ for (const [index, rawRange] of rawRanges.entries()) {
36018
+ if (!isJsonObject2(rawRange)) {
36019
+ throw new Error(
36020
+ `Invalid score_range entry at index ${index} for rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': expected object`
36021
+ );
36022
+ }
36023
+ const scoreRangeValue = rawRange.score_range;
36024
+ if (!Array.isArray(scoreRangeValue) || scoreRangeValue.length !== 2 || typeof scoreRangeValue[0] !== "number" || typeof scoreRangeValue[1] !== "number") {
36025
+ throw new Error(
36026
+ `Invalid score_range at index ${index} for rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': must be [min, max] array of two numbers`
36027
+ );
36028
+ }
36029
+ const [min, max] = scoreRangeValue;
36030
+ if (!Number.isInteger(min) || !Number.isInteger(max)) {
36031
+ throw new Error(
36032
+ `Invalid score_range at index ${index} for rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': values must be integers (got [${min}, ${max}])`
36033
+ );
36034
+ }
36035
+ if (min < 0 || min > 10 || max < 0 || max > 10) {
36036
+ throw new Error(
36037
+ `Invalid score_range at index ${index} for rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': values must be 0-10 (got [${min}, ${max}])`
36038
+ );
36039
+ }
36040
+ if (min > max) {
36041
+ throw new Error(
36042
+ `Invalid score_range at index ${index} for rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': min must be <= max (got [${min}, ${max}])`
36043
+ );
36044
+ }
36045
+ const expectedOutcome = asString(rawRange.expected_outcome) ?? asString(rawRange.description) ?? "";
36046
+ if (expectedOutcome.length === 0) {
36047
+ throw new Error(
36048
+ `Missing expected_outcome for score_range [${min}, ${max}] in rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}'`
36049
+ );
36050
+ }
36051
+ ranges.push({
36052
+ score_range: [min, max],
36053
+ expected_outcome: expectedOutcome
36054
+ });
36055
+ }
36056
+ const sortedRanges = [...ranges].sort((a, b) => a.score_range[0] - b.score_range[0]);
36057
+ for (let i = 1; i < sortedRanges.length; i++) {
36058
+ const prev = sortedRanges[i - 1];
36059
+ const curr = sortedRanges[i];
36060
+ if (curr.score_range[0] <= prev.score_range[1]) {
36061
+ throw new Error(
36062
+ `Overlapping score_ranges in rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': [${prev.score_range[0]}, ${prev.score_range[1]}] overlaps with [${curr.score_range[0]}, ${curr.score_range[1]}]`
36063
+ );
36064
+ }
36065
+ }
36066
+ const covered = /* @__PURE__ */ new Set();
36067
+ for (const range of ranges) {
36068
+ for (let i = range.score_range[0]; i <= range.score_range[1]; i++) {
36069
+ covered.add(i);
36070
+ }
36071
+ }
36072
+ const missing = [];
36073
+ for (let i = 0; i <= 10; i++) {
36074
+ if (!covered.has(i)) {
36075
+ missing.push(i);
36076
+ }
36077
+ }
36078
+ if (missing.length > 0) {
36079
+ throw new Error(
36080
+ `Incomplete score_ranges coverage in rubric '${rubricId}' in evaluator '${evaluatorName}' in '${evalId}': missing coverage for scores: ${missing.join(", ")}. Ranges must cover all integers 0-10.`
36081
+ );
36082
+ }
36083
+ return ranges;
36084
+ }
36085
+ function parseInlineRubrics(rawRubrics) {
36086
+ const rubricItems = rawRubrics.filter((r) => isJsonObject2(r) || typeof r === "string").map((rubric, index) => {
36087
+ if (typeof rubric === "string") {
36088
+ return {
36089
+ id: `rubric-${index + 1}`,
36090
+ expected_outcome: rubric,
36091
+ weight: 1,
36092
+ required: true
36093
+ };
36094
+ }
36095
+ const expectedOutcome = asString(rubric.expected_outcome) ?? asString(rubric.description) ?? "";
36096
+ const rawScoreRanges = rubric.score_ranges;
36097
+ const scoreRanges = Array.isArray(rawScoreRanges) && rawScoreRanges.length > 0 ? rawScoreRanges.filter((r) => isJsonObject2(r)).map((range) => ({
36098
+ score_range: Array.isArray(range.score_range) ? range.score_range : [0, 10],
36099
+ expected_outcome: asString(range.expected_outcome) ?? asString(range.description) ?? ""
36100
+ })).filter((r) => r.expected_outcome.length > 0) : void 0;
36101
+ const baseRubric = {
36102
+ id: asString(rubric.id) ?? `rubric-${index + 1}`,
36103
+ weight: typeof rubric.weight === "number" ? rubric.weight : 1
36104
+ };
36105
+ if (scoreRanges && scoreRanges.length > 0) {
36106
+ return {
36107
+ ...baseRubric,
36108
+ ...expectedOutcome.length > 0 ? { expected_outcome: expectedOutcome } : {},
36109
+ ...typeof rubric.required === "boolean" ? { required: rubric.required } : {},
36110
+ ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {},
36111
+ score_ranges: scoreRanges
36112
+ };
36113
+ }
36114
+ return {
36115
+ ...baseRubric,
36116
+ expected_outcome: expectedOutcome,
36117
+ required: typeof rubric.required === "boolean" ? rubric.required : true,
36118
+ ...typeof rubric.required_min_score === "number" ? { required_min_score: rubric.required_min_score } : {}
36119
+ };
36120
+ }).filter(
36121
+ (r) => r.expected_outcome && r.expected_outcome.length > 0 || "score_ranges" in r && r.score_ranges
36122
+ );
36123
+ if (rubricItems.length === 0) {
36124
+ return void 0;
36125
+ }
36126
+ return {
36127
+ name: "rubric",
36128
+ type: "llm_judge",
36129
+ rubrics: rubricItems
36130
+ };
36131
+ }
35915
36132
  function formatFileContents(parts) {
35916
36133
  const fileCount = parts.filter((p) => p.isFile).length;
35917
36134
  if (fileCount > 0) {
@@ -36164,25 +36381,295 @@ async function processExpectedMessages(options) {
36164
36381
  }
36165
36382
  return segments;
36166
36383
  }
36384
+ function expandInputShorthand(value) {
36385
+ if (value === void 0 || value === null) {
36386
+ return void 0;
36387
+ }
36388
+ if (typeof value === "string") {
36389
+ return [{ role: "user", content: value }];
36390
+ }
36391
+ if (Array.isArray(value)) {
36392
+ const messages = value.filter((msg) => isTestMessage(msg));
36393
+ return messages.length > 0 ? messages : void 0;
36394
+ }
36395
+ return void 0;
36396
+ }
36397
+ function expandExpectedOutputShorthand(value) {
36398
+ if (value === void 0 || value === null) {
36399
+ return void 0;
36400
+ }
36401
+ if (typeof value === "string") {
36402
+ return [{ role: "assistant", content: value }];
36403
+ }
36404
+ if (Array.isArray(value)) {
36405
+ if (value.length > 0 && isJsonObject(value[0]) && "role" in value[0]) {
36406
+ const messages = value.filter((msg) => isTestMessage(msg));
36407
+ return messages.length > 0 ? messages : void 0;
36408
+ }
36409
+ return [{ role: "assistant", content: value }];
36410
+ }
36411
+ if (isJsonObject(value)) {
36412
+ if ("role" in value) {
36413
+ return isTestMessage(value) ? [value] : void 0;
36414
+ }
36415
+ return [{ role: "assistant", content: value }];
36416
+ }
36417
+ return void 0;
36418
+ }
36419
+ function resolveInputMessages(raw) {
36420
+ if (raw.input_messages !== void 0) {
36421
+ if (Array.isArray(raw.input_messages)) {
36422
+ const messages = raw.input_messages.filter((msg) => isTestMessage(msg));
36423
+ return messages.length > 0 ? messages : void 0;
36424
+ }
36425
+ return void 0;
36426
+ }
36427
+ return expandInputShorthand(raw.input);
36428
+ }
36429
+ function resolveExpectedMessages(raw) {
36430
+ if (raw.expected_messages !== void 0) {
36431
+ if (Array.isArray(raw.expected_messages)) {
36432
+ const messages = raw.expected_messages.filter(
36433
+ (msg) => isTestMessage(msg)
36434
+ );
36435
+ return messages.length > 0 ? messages : void 0;
36436
+ }
36437
+ return void 0;
36438
+ }
36439
+ return expandExpectedOutputShorthand(raw.expected_output);
36440
+ }
36167
36441
  var ANSI_YELLOW5 = "\x1B[33m";
36442
+ var ANSI_RED = "\x1B[31m";
36168
36443
  var ANSI_RESET5 = "\x1B[0m";
36444
+ function detectFormat(filePath) {
36445
+ const ext = path52.extname(filePath).toLowerCase();
36446
+ if (ext === ".jsonl") return "jsonl";
36447
+ if (ext === ".yaml" || ext === ".yml") return "yaml";
36448
+ throw new Error(`Unsupported file format: '${ext}'. Supported formats: .yaml, .yml, .jsonl`);
36449
+ }
36450
+ async function loadSidecarMetadata(jsonlPath, verbose) {
36451
+ const dir = path52.dirname(jsonlPath);
36452
+ const base = path52.basename(jsonlPath, ".jsonl");
36453
+ const sidecarPath = path52.join(dir, `${base}.yaml`);
36454
+ if (!await fileExists2(sidecarPath)) {
36455
+ if (verbose) {
36456
+ logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
36457
+ }
36458
+ return {};
36459
+ }
36460
+ try {
36461
+ const content = await readFile42(sidecarPath, "utf8");
36462
+ const parsed = parseYaml(content);
36463
+ if (!isJsonObject(parsed)) {
36464
+ logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
36465
+ return {};
36466
+ }
36467
+ return {
36468
+ description: asString4(parsed.description),
36469
+ dataset: asString4(parsed.dataset),
36470
+ execution: isJsonObject(parsed.execution) ? parsed.execution : void 0,
36471
+ evaluator: parsed.evaluator
36472
+ };
36473
+ } catch (error40) {
36474
+ logWarning4(`Could not read sidecar metadata from ${sidecarPath}: ${error40.message}`);
36475
+ return {};
36476
+ }
36477
+ }
36478
+ function parseJsonlContent(content, filePath) {
36479
+ const lines = content.split("\n");
36480
+ const cases = [];
36481
+ for (let i = 0; i < lines.length; i++) {
36482
+ const line2 = lines[i].trim();
36483
+ if (line2 === "") continue;
36484
+ try {
36485
+ const parsed = JSON.parse(line2);
36486
+ if (!isJsonObject(parsed)) {
36487
+ throw new Error("Expected JSON object");
36488
+ }
36489
+ cases.push(parsed);
36490
+ } catch (error40) {
36491
+ const message = error40 instanceof Error ? error40.message : String(error40);
36492
+ throw new Error(`Line ${i + 1}: Invalid JSON - ${message}
36493
+ File: ${filePath}`);
36494
+ }
36495
+ }
36496
+ return cases;
36497
+ }
36498
+ async function loadEvalCasesFromJsonl(evalFilePath, repoRoot, options) {
36499
+ const verbose = options?.verbose ?? false;
36500
+ const filterPattern = options?.filter;
36501
+ const absoluteTestPath = path52.resolve(evalFilePath);
36502
+ const repoRootPath = resolveToAbsolutePath(repoRoot);
36503
+ const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
36504
+ const config2 = await loadConfig(absoluteTestPath, repoRootPath);
36505
+ const guidelinePatterns = config2?.guideline_patterns;
36506
+ const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
36507
+ const rawFile = await readFile42(absoluteTestPath, "utf8");
36508
+ const rawCases = parseJsonlContent(rawFile, evalFilePath);
36509
+ const fallbackDataset = path52.basename(absoluteTestPath, ".jsonl") || "eval";
36510
+ const datasetName = sidecar.dataset && sidecar.dataset.trim().length > 0 ? sidecar.dataset : fallbackDataset;
36511
+ const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm_judge";
36512
+ const globalExecution = sidecar.execution;
36513
+ if (verbose) {
36514
+ console.log(`
36515
+ [JSONL Dataset: ${evalFilePath}]`);
36516
+ console.log(` Cases: ${rawCases.length}`);
36517
+ console.log(` Dataset name: ${datasetName}`);
36518
+ if (sidecar.description) {
36519
+ console.log(` Description: ${sidecar.description}`);
36520
+ }
36521
+ }
36522
+ const results = [];
36523
+ for (let lineIndex = 0; lineIndex < rawCases.length; lineIndex++) {
36524
+ const evalcase = rawCases[lineIndex];
36525
+ const lineNumber = lineIndex + 1;
36526
+ const id = asString4(evalcase.id);
36527
+ if (filterPattern && (!id || !micromatch2.isMatch(id, filterPattern))) {
36528
+ continue;
36529
+ }
36530
+ const conversationId = asString4(evalcase.conversation_id);
36531
+ const outcome = asString4(evalcase.expected_outcome) ?? asString4(evalcase.outcome);
36532
+ const inputMessages = resolveInputMessages(evalcase);
36533
+ const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
36534
+ if (!id || !outcome || !inputMessages || inputMessages.length === 0) {
36535
+ logError(
36536
+ `Skipping incomplete eval case at line ${lineNumber}: ${id ?? "unknown"}. Missing required fields: id, expected_outcome, and/or input_messages (or input)`
36537
+ );
36538
+ continue;
36539
+ }
36540
+ const hasExpectedMessages = expectedMessages.length > 0;
36541
+ const guidelinePaths = [];
36542
+ const inputTextParts = [];
36543
+ const inputSegments = await processMessages({
36544
+ messages: inputMessages,
36545
+ searchRoots,
36546
+ repoRootPath,
36547
+ guidelinePatterns,
36548
+ guidelinePaths,
36549
+ textParts: inputTextParts,
36550
+ messageType: "input",
36551
+ verbose
36552
+ });
36553
+ const outputSegments = hasExpectedMessages ? await processExpectedMessages({
36554
+ messages: expectedMessages,
36555
+ searchRoots,
36556
+ repoRootPath,
36557
+ verbose
36558
+ }) : [];
36559
+ let referenceAnswer = "";
36560
+ if (outputSegments.length > 0) {
36561
+ const lastMessage = outputSegments[outputSegments.length - 1];
36562
+ const content = lastMessage.content;
36563
+ const toolCalls = lastMessage.tool_calls;
36564
+ if (typeof content === "string") {
36565
+ referenceAnswer = content;
36566
+ } else if (content !== void 0 && content !== null) {
36567
+ referenceAnswer = JSON.stringify(content, null, 2);
36568
+ } else if (toolCalls !== void 0 && toolCalls !== null) {
36569
+ referenceAnswer = JSON.stringify(toolCalls, null, 2);
36570
+ }
36571
+ }
36572
+ const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
36573
+ const caseExecution = isJsonObject(evalcase.execution) ? evalcase.execution : void 0;
36574
+ const mergedExecution = caseExecution ?? globalExecution;
36575
+ const evalCaseEvaluatorKind = coerceEvaluator(evalcase.evaluator, id) ?? globalEvaluator;
36576
+ let evaluators;
36577
+ try {
36578
+ evaluators = await parseEvaluators(evalcase, mergedExecution, searchRoots, id ?? "unknown");
36579
+ } catch (error40) {
36580
+ const message = error40 instanceof Error ? error40.message : String(error40);
36581
+ logError(`Skipping eval case '${id}' at line ${lineNumber}: ${message}`);
36582
+ continue;
36583
+ }
36584
+ const inlineRubrics = evalcase.rubrics;
36585
+ if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
36586
+ const rubricEvaluator = parseInlineRubrics(inlineRubrics);
36587
+ if (rubricEvaluator) {
36588
+ evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
36589
+ }
36590
+ }
36591
+ const userFilePaths = [];
36592
+ for (const segment of inputSegments) {
36593
+ if (segment.type === "file" && typeof segment.resolvedPath === "string") {
36594
+ userFilePaths.push(segment.resolvedPath);
36595
+ }
36596
+ }
36597
+ const allFilePaths = [
36598
+ ...guidelinePaths.map((guidelinePath) => path52.resolve(guidelinePath)),
36599
+ ...userFilePaths
36600
+ ];
36601
+ const testCase = {
36602
+ id,
36603
+ dataset: datasetName,
36604
+ conversation_id: conversationId,
36605
+ question,
36606
+ input_messages: inputMessages,
36607
+ input_segments: inputSegments,
36608
+ expected_messages: outputSegments,
36609
+ reference_answer: referenceAnswer,
36610
+ guideline_paths: guidelinePaths.map((guidelinePath) => path52.resolve(guidelinePath)),
36611
+ guideline_patterns: guidelinePatterns,
36612
+ file_paths: allFilePaths,
36613
+ expected_outcome: outcome,
36614
+ evaluator: evalCaseEvaluatorKind,
36615
+ evaluators
36616
+ };
36617
+ if (verbose) {
36618
+ console.log(`
36619
+ [Eval Case: ${id}]`);
36620
+ if (testCase.guideline_paths.length > 0) {
36621
+ console.log(` Guidelines used: ${testCase.guideline_paths.length}`);
36622
+ for (const guidelinePath of testCase.guideline_paths) {
36623
+ console.log(` - ${guidelinePath}`);
36624
+ }
36625
+ } else {
36626
+ console.log(" No guidelines found");
36627
+ }
36628
+ }
36629
+ results.push(testCase);
36630
+ }
36631
+ return results;
36632
+ }
36633
+ function asString4(value) {
36634
+ return typeof value === "string" ? value : void 0;
36635
+ }
36636
+ function logWarning4(message, details) {
36637
+ if (details && details.length > 0) {
36638
+ const detailBlock = details.join("\n");
36639
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}
36640
+ ${detailBlock}${ANSI_RESET5}`);
36641
+ } else {
36642
+ console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
36643
+ }
36644
+ }
36645
+ function logError(message, details) {
36646
+ if (details && details.length > 0) {
36647
+ const detailBlock = details.join("\n");
36648
+ console.error(`${ANSI_RED}Error: ${message}
36649
+ ${detailBlock}${ANSI_RESET5}`);
36650
+ } else {
36651
+ console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET5}`);
36652
+ }
36653
+ }
36654
+ var ANSI_YELLOW6 = "\x1B[33m";
36655
+ var ANSI_RESET6 = "\x1B[0m";
36169
36656
  async function buildPromptInputs(testCase, mode = "lm") {
36170
36657
  const guidelineParts = [];
36171
36658
  for (const rawPath of testCase.guideline_paths) {
36172
- const absolutePath = path52.resolve(rawPath);
36659
+ const absolutePath = path62.resolve(rawPath);
36173
36660
  if (!await fileExists2(absolutePath)) {
36174
- logWarning4(`Could not read guideline file ${absolutePath}: file does not exist`);
36661
+ logWarning5(`Could not read guideline file ${absolutePath}: file does not exist`);
36175
36662
  continue;
36176
36663
  }
36177
36664
  try {
36178
- const content = (await readFile42(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
36665
+ const content = (await readFile5(absolutePath, "utf8")).replace(/\r\n/g, "\n").trim();
36179
36666
  guidelineParts.push({
36180
36667
  content,
36181
36668
  isFile: true,
36182
- displayPath: path52.basename(absolutePath)
36669
+ displayPath: path62.basename(absolutePath)
36183
36670
  });
36184
36671
  } catch (error40) {
36185
- logWarning4(`Could not read guideline file ${absolutePath}: ${error40.message}`);
36672
+ logWarning5(`Could not read guideline file ${absolutePath}: ${error40.message}`);
36186
36673
  }
36187
36674
  }
36188
36675
  const guidelines = formatFileContents(guidelineParts);
@@ -36206,9 +36693,9 @@ async function buildPromptInputs(testCase, mode = "lm") {
36206
36693
  messageSegments.push({ type: "text", value: segment });
36207
36694
  }
36208
36695
  } else if (isJsonObject(segment)) {
36209
- const type = asString4(segment.type);
36696
+ const type = asString5(segment.type);
36210
36697
  if (type === "file") {
36211
- const value = asString4(segment.value);
36698
+ const value = asString5(segment.value);
36212
36699
  if (!value) continue;
36213
36700
  if (testCase.guideline_patterns && isGuidelineFile(value, testCase.guideline_patterns)) {
36214
36701
  messageSegments.push({ type: "guideline_ref", path: value });
@@ -36219,7 +36706,7 @@ async function buildPromptInputs(testCase, mode = "lm") {
36219
36706
  messageSegments.push({ type: "file", text: fileText, path: value });
36220
36707
  }
36221
36708
  } else if (type === "text") {
36222
- const textValue = asString4(segment.value);
36709
+ const textValue = asString5(segment.value);
36223
36710
  if (textValue && textValue.trim().length > 0) {
36224
36711
  messageSegments.push({ type: "text", value: textValue });
36225
36712
  }
@@ -36373,19 +36860,19 @@ ${guidelineContent.trim()}`);
36373
36860
  }
36374
36861
  return chatPrompt.length > 0 ? chatPrompt : void 0;
36375
36862
  }
36376
- function asString4(value) {
36863
+ function asString5(value) {
36377
36864
  return typeof value === "string" ? value : void 0;
36378
36865
  }
36379
- function logWarning4(message) {
36380
- console.warn(`${ANSI_YELLOW5}Warning: ${message}${ANSI_RESET5}`);
36866
+ function logWarning5(message) {
36867
+ console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
36381
36868
  }
36382
- var ANSI_YELLOW6 = "\x1B[33m";
36383
- var ANSI_RED = "\x1B[31m";
36384
- var ANSI_RESET6 = "\x1B[0m";
36869
+ var ANSI_YELLOW7 = "\x1B[33m";
36870
+ var ANSI_RED2 = "\x1B[31m";
36871
+ var ANSI_RESET7 = "\x1B[0m";
36385
36872
  async function readTestSuiteMetadata(testFilePath) {
36386
36873
  try {
36387
- const absolutePath = path62.resolve(testFilePath);
36388
- const content = await readFile5(absolutePath, "utf8");
36874
+ const absolutePath = path72.resolve(testFilePath);
36875
+ const content = await readFile6(absolutePath, "utf8");
36389
36876
  const parsed = parse22(content);
36390
36877
  if (!isJsonObject(parsed)) {
36391
36878
  return {};
@@ -36396,21 +36883,25 @@ async function readTestSuiteMetadata(testFilePath) {
36396
36883
  }
36397
36884
  }
36398
36885
  async function loadEvalCases(evalFilePath, repoRoot, options) {
36886
+ const format = detectFormat(evalFilePath);
36887
+ if (format === "jsonl") {
36888
+ return loadEvalCasesFromJsonl(evalFilePath, repoRoot, options);
36889
+ }
36399
36890
  const verbose = options?.verbose ?? false;
36400
- const evalIdFilter = options?.evalId;
36401
- const absoluteTestPath = path62.resolve(evalFilePath);
36891
+ const filterPattern = options?.filter;
36892
+ const absoluteTestPath = path72.resolve(evalFilePath);
36402
36893
  const repoRootPath = resolveToAbsolutePath(repoRoot);
36403
36894
  const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
36404
36895
  const config2 = await loadConfig(absoluteTestPath, repoRootPath);
36405
36896
  const guidelinePatterns = config2?.guideline_patterns;
36406
- const rawFile = await readFile5(absoluteTestPath, "utf8");
36897
+ const rawFile = await readFile6(absoluteTestPath, "utf8");
36407
36898
  const parsed = parse22(rawFile);
36408
36899
  if (!isJsonObject(parsed)) {
36409
36900
  throw new Error(`Invalid test file format: ${evalFilePath}`);
36410
36901
  }
36411
36902
  const suite = parsed;
36412
- const datasetNameFromSuite = asString5(suite.dataset)?.trim();
36413
- const fallbackDataset = path62.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
36903
+ const datasetNameFromSuite = asString6(suite.dataset)?.trim();
36904
+ const fallbackDataset = path72.basename(absoluteTestPath).replace(/\.ya?ml$/i, "") || "eval";
36414
36905
  const datasetName = datasetNameFromSuite && datasetNameFromSuite.length > 0 ? datasetNameFromSuite : fallbackDataset;
36415
36906
  const rawTestcases = suite.evalcases;
36416
36907
  if (!Array.isArray(rawTestcases)) {
@@ -36418,37 +36909,29 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
36418
36909
  }
36419
36910
  const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm_judge";
36420
36911
  const globalExecution = isJsonObject(suite.execution) ? suite.execution : void 0;
36421
- const _globalTarget = asString5(globalExecution?.target) ?? asString5(suite.target);
36912
+ const _globalTarget = asString6(globalExecution?.target) ?? asString6(suite.target);
36422
36913
  const results = [];
36423
36914
  for (const rawEvalcase of rawTestcases) {
36424
36915
  if (!isJsonObject(rawEvalcase)) {
36425
- logWarning5("Skipping invalid eval case entry (expected object)");
36916
+ logWarning6("Skipping invalid eval case entry (expected object)");
36426
36917
  continue;
36427
36918
  }
36428
36919
  const evalcase = rawEvalcase;
36429
- const id = asString5(evalcase.id);
36430
- if (evalIdFilter && id !== evalIdFilter) {
36920
+ const id = asString6(evalcase.id);
36921
+ if (filterPattern && (!id || !micromatch3.isMatch(id, filterPattern))) {
36431
36922
  continue;
36432
36923
  }
36433
- const conversationId = asString5(evalcase.conversation_id);
36434
- const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
36435
- const inputMessagesValue = evalcase.input_messages;
36436
- const expectedMessagesValue = evalcase.expected_messages;
36437
- if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
36438
- logError(
36439
- `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages`
36924
+ const conversationId = asString6(evalcase.conversation_id);
36925
+ const outcome = asString6(evalcase.expected_outcome) ?? asString6(evalcase.outcome);
36926
+ const inputMessages = resolveInputMessages(evalcase);
36927
+ const expectedMessages = resolveExpectedMessages(evalcase) ?? [];
36928
+ if (!id || !outcome || !inputMessages || inputMessages.length === 0) {
36929
+ logError2(
36930
+ `Skipping incomplete eval case: ${id ?? "unknown"}. Missing required fields: id, outcome, and/or input_messages (or input)`
36440
36931
  );
36441
36932
  continue;
36442
36933
  }
36443
- const hasExpectedMessages = Array.isArray(expectedMessagesValue) && expectedMessagesValue.length > 0;
36444
- const inputMessages = inputMessagesValue.filter(
36445
- (msg) => isTestMessage(msg)
36446
- );
36447
- const expectedMessages = hasExpectedMessages ? expectedMessagesValue.filter((msg) => isTestMessage(msg)) : [];
36448
- if (hasExpectedMessages && expectedMessages.length === 0) {
36449
- logError(`No valid expected message found for eval case: ${id}`);
36450
- continue;
36451
- }
36934
+ const hasExpectedMessages = expectedMessages.length > 0;
36452
36935
  const guidelinePaths = [];
36453
36936
  const inputTextParts = [];
36454
36937
  const inputSegments = await processMessages({
@@ -36487,33 +36970,13 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
36487
36970
  evaluators = await parseEvaluators(evalcase, globalExecution, searchRoots, id ?? "unknown");
36488
36971
  } catch (error40) {
36489
36972
  const message = error40 instanceof Error ? error40.message : String(error40);
36490
- logError(`Skipping eval case '${id}': ${message}`);
36973
+ logError2(`Skipping eval case '${id}': ${message}`);
36491
36974
  continue;
36492
36975
  }
36493
36976
  const inlineRubrics = evalcase.rubrics;
36494
36977
  if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
36495
- const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
36496
- if (typeof rubric === "string") {
36497
- return {
36498
- id: `rubric-${index + 1}`,
36499
- description: rubric,
36500
- weight: 1,
36501
- required: true
36502
- };
36503
- }
36504
- return {
36505
- id: asString5(rubric.id) ?? `rubric-${index + 1}`,
36506
- description: asString5(rubric.description) ?? "",
36507
- weight: typeof rubric.weight === "number" ? rubric.weight : 1,
36508
- required: typeof rubric.required === "boolean" ? rubric.required : true
36509
- };
36510
- }).filter((r) => r.description.length > 0);
36511
- if (rubricItems.length > 0) {
36512
- const rubricEvaluator = {
36513
- name: "rubric",
36514
- type: "llm_judge",
36515
- rubrics: rubricItems
36516
- };
36978
+ const rubricEvaluator = parseInlineRubrics(inlineRubrics);
36979
+ if (rubricEvaluator) {
36517
36980
  evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
36518
36981
  }
36519
36982
  }
@@ -36524,7 +36987,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
36524
36987
  }
36525
36988
  }
36526
36989
  const allFilePaths = [
36527
- ...guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
36990
+ ...guidelinePaths.map((guidelinePath) => path72.resolve(guidelinePath)),
36528
36991
  ...userFilePaths
36529
36992
  ];
36530
36993
  const testCase = {
@@ -36536,7 +36999,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
36536
36999
  input_segments: inputSegments,
36537
37000
  expected_messages: outputSegments,
36538
37001
  reference_answer: referenceAnswer,
36539
- guideline_paths: guidelinePaths.map((guidelinePath) => path62.resolve(guidelinePath)),
37002
+ guideline_paths: guidelinePaths.map((guidelinePath) => path72.resolve(guidelinePath)),
36540
37003
  guideline_patterns: guidelinePatterns,
36541
37004
  file_paths: allFilePaths,
36542
37005
  expected_outcome: outcome,
@@ -36559,25 +37022,25 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
36559
37022
  }
36560
37023
  return results;
36561
37024
  }
36562
- function asString5(value) {
37025
+ function asString6(value) {
36563
37026
  return typeof value === "string" ? value : void 0;
36564
37027
  }
36565
- function logWarning5(message, details) {
37028
+ function logWarning6(message, details) {
36566
37029
  if (details && details.length > 0) {
36567
37030
  const detailBlock = details.join("\n");
36568
- console.warn(`${ANSI_YELLOW6}Warning: ${message}
36569
- ${detailBlock}${ANSI_RESET6}`);
37031
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}
37032
+ ${detailBlock}${ANSI_RESET7}`);
36570
37033
  } else {
36571
- console.warn(`${ANSI_YELLOW6}Warning: ${message}${ANSI_RESET6}`);
37034
+ console.warn(`${ANSI_YELLOW7}Warning: ${message}${ANSI_RESET7}`);
36572
37035
  }
36573
37036
  }
36574
- function logError(message, details) {
37037
+ function logError2(message, details) {
36575
37038
  if (details && details.length > 0) {
36576
37039
  const detailBlock = details.join("\n");
36577
- console.error(`${ANSI_RED}Error: ${message}
36578
- ${detailBlock}${ANSI_RESET6}`);
37040
+ console.error(`${ANSI_RED2}Error: ${message}
37041
+ ${detailBlock}${ANSI_RESET7}`);
36579
37042
  } else {
36580
- console.error(`${ANSI_RED}Error: ${message}${ANSI_RESET6}`);
37043
+ console.error(`${ANSI_RED2}Error: ${message}${ANSI_RESET7}`);
36581
37044
  }
36582
37045
  }
36583
37046
  var DEFAULT_SYSTEM_PROMPT = "You are a careful assistant. Follow all provided instructions and do not fabricate results.";
@@ -36966,7 +37429,7 @@ function normalizeInputFiles(inputFiles) {
36966
37429
  }
36967
37430
  const deduped = /* @__PURE__ */ new Map();
36968
37431
  for (const inputFile of inputFiles) {
36969
- const absolutePath = path72.resolve(inputFile);
37432
+ const absolutePath = path82.resolve(inputFile);
36970
37433
  if (!deduped.has(absolutePath)) {
36971
37434
  deduped.set(absolutePath, absolutePath);
36972
37435
  }
@@ -36979,14 +37442,14 @@ function collectGuidelineFiles(inputFiles, guidelinePatterns, overrides) {
36979
37442
  }
36980
37443
  const unique = /* @__PURE__ */ new Map();
36981
37444
  for (const inputFile of inputFiles) {
36982
- const absolutePath = path72.resolve(inputFile);
37445
+ const absolutePath = path82.resolve(inputFile);
36983
37446
  if (overrides?.has(absolutePath)) {
36984
37447
  if (!unique.has(absolutePath)) {
36985
37448
  unique.set(absolutePath, absolutePath);
36986
37449
  }
36987
37450
  continue;
36988
37451
  }
36989
- const normalized = absolutePath.split(path72.sep).join("/");
37452
+ const normalized = absolutePath.split(path82.sep).join("/");
36990
37453
  if (isGuidelineFile(normalized, guidelinePatterns)) {
36991
37454
  if (!unique.has(absolutePath)) {
36992
37455
  unique.set(absolutePath, absolutePath);
@@ -37001,7 +37464,7 @@ function collectInputFiles(inputFiles) {
37001
37464
  }
37002
37465
  const unique = /* @__PURE__ */ new Map();
37003
37466
  for (const inputFile of inputFiles) {
37004
- const absolutePath = path72.resolve(inputFile);
37467
+ const absolutePath = path82.resolve(inputFile);
37005
37468
  if (!unique.has(absolutePath)) {
37006
37469
  unique.set(absolutePath, absolutePath);
37007
37470
  }
@@ -37013,7 +37476,7 @@ function buildMandatoryPrereadBlock(guidelineFiles, inputFiles) {
37013
37476
  return "";
37014
37477
  }
37015
37478
  const buildList = (files) => files.map((absolutePath) => {
37016
- const fileName = path72.basename(absolutePath);
37479
+ const fileName = path82.basename(absolutePath);
37017
37480
  const fileUri = pathToFileUri2(absolutePath);
37018
37481
  return `* [${fileName}](${fileUri})`;
37019
37482
  });
@@ -37033,7 +37496,7 @@ ${buildList(inputFiles).join("\n")}.`);
37033
37496
  return sections.join("\n");
37034
37497
  }
37035
37498
  function pathToFileUri2(filePath) {
37036
- const absolutePath = path72.isAbsolute(filePath) ? filePath : path72.resolve(filePath);
37499
+ const absolutePath = path82.isAbsolute(filePath) ? filePath : path82.resolve(filePath);
37037
37500
  const normalizedPath = absolutePath.replace(/\\/g, "/");
37038
37501
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
37039
37502
  return `file:///${normalizedPath}`;
@@ -37068,7 +37531,7 @@ var ClaudeCodeProvider = class {
37068
37531
  const workspaceRoot = await this.createWorkspace();
37069
37532
  const logger = await this.createStreamLogger(request).catch(() => void 0);
37070
37533
  try {
37071
- const promptFile = path82.join(workspaceRoot, PROMPT_FILENAME);
37534
+ const promptFile = path92.join(workspaceRoot, PROMPT_FILENAME);
37072
37535
  await writeFile5(promptFile, request.question, "utf8");
37073
37536
  const args = this.buildClaudeCodeArgs(request.question, inputFiles);
37074
37537
  const cwd = this.resolveCwd();
@@ -37116,7 +37579,7 @@ var ClaudeCodeProvider = class {
37116
37579
  if (!this.config.cwd) {
37117
37580
  return process.cwd();
37118
37581
  }
37119
- return path82.resolve(this.config.cwd);
37582
+ return path92.resolve(this.config.cwd);
37120
37583
  }
37121
37584
  buildClaudeCodeArgs(prompt, inputFiles) {
37122
37585
  const args = [];
@@ -37173,7 +37636,7 @@ ${filesContext}`;
37173
37636
  }
37174
37637
  }
37175
37638
  async createWorkspace() {
37176
- return await mkdtemp(path82.join(tmpdir(), WORKSPACE_PREFIX));
37639
+ return await mkdtemp(path92.join(tmpdir(), WORKSPACE_PREFIX));
37177
37640
  }
37178
37641
  async cleanupWorkspace(workspaceRoot) {
37179
37642
  try {
@@ -37187,9 +37650,9 @@ ${filesContext}`;
37187
37650
  return void 0;
37188
37651
  }
37189
37652
  if (this.config.logDir) {
37190
- return path82.resolve(this.config.logDir);
37653
+ return path92.resolve(this.config.logDir);
37191
37654
  }
37192
- return path82.join(process.cwd(), ".agentv", "logs", "claude-code");
37655
+ return path92.join(process.cwd(), ".agentv", "logs", "claude-code");
37193
37656
  }
37194
37657
  async createStreamLogger(request) {
37195
37658
  const logDir = this.resolveLogDirectory();
@@ -37203,7 +37666,7 @@ ${filesContext}`;
37203
37666
  console.warn(`Skipping Claude Code stream logging (could not create ${logDir}): ${message}`);
37204
37667
  return void 0;
37205
37668
  }
37206
- const filePath = path82.join(logDir, buildLogFilename(request, this.targetName));
37669
+ const filePath = path92.join(logDir, buildLogFilename(request, this.targetName));
37207
37670
  try {
37208
37671
  const logger = await ClaudeCodeStreamLogger.create({
37209
37672
  filePath,
@@ -37608,10 +38071,10 @@ function escapeShellArg(arg) {
37608
38071
  }
37609
38072
  async function defaultClaudeCodeRunner(options) {
37610
38073
  const tempId = randomUUID();
37611
- const stdoutFile = path82.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
37612
- const stderrFile = path82.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
37613
- const exitFile = path82.join(tmpdir(), `agentv-cc-${tempId}-exit`);
37614
- const pidFile = path82.join(tmpdir(), `agentv-cc-${tempId}-pid`);
38074
+ const stdoutFile = path92.join(tmpdir(), `agentv-cc-${tempId}-stdout`);
38075
+ const stderrFile = path92.join(tmpdir(), `agentv-cc-${tempId}-stderr`);
38076
+ const exitFile = path92.join(tmpdir(), `agentv-cc-${tempId}-exit`);
38077
+ const pidFile = path92.join(tmpdir(), `agentv-cc-${tempId}-pid`);
37615
38078
  try {
37616
38079
  return await runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitFile, pidFile);
37617
38080
  } finally {
@@ -37651,8 +38114,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
37651
38114
  let lastStdoutSize = 0;
37652
38115
  const readFileIfExists = async (filePath) => {
37653
38116
  try {
37654
- const { readFile: readFile72 } = await import("node:fs/promises");
37655
- return await readFile72(filePath, "utf8");
38117
+ const { readFile: readFile82 } = await import("node:fs/promises");
38118
+ return await readFile82(filePath, "utf8");
37656
38119
  } catch {
37657
38120
  return "";
37658
38121
  }
@@ -37727,7 +38190,8 @@ var ToolCallSchema = external_exports.object({
37727
38190
  input: external_exports.unknown().optional(),
37728
38191
  output: external_exports.unknown().optional(),
37729
38192
  id: external_exports.string().optional(),
37730
- timestamp: external_exports.string().optional()
38193
+ timestamp: external_exports.string().optional(),
38194
+ duration_ms: external_exports.number().optional()
37731
38195
  });
37732
38196
  var OutputMessageInputSchema = external_exports.object({
37733
38197
  role: external_exports.string(),
@@ -37735,6 +38199,7 @@ var OutputMessageInputSchema = external_exports.object({
37735
38199
  content: external_exports.unknown().optional(),
37736
38200
  tool_calls: external_exports.array(ToolCallSchema).optional(),
37737
38201
  timestamp: external_exports.string().optional(),
38202
+ duration_ms: external_exports.number().optional(),
37738
38203
  metadata: external_exports.record(external_exports.unknown()).optional()
37739
38204
  });
37740
38205
  var TokenUsageSchema = external_exports.object({
@@ -37773,8 +38238,16 @@ function convertOutputMessages(messages) {
37773
38238
  role: msg.role,
37774
38239
  name: msg.name,
37775
38240
  content: msg.content,
37776
- toolCalls: msg.tool_calls,
38241
+ toolCalls: msg.tool_calls?.map((tc) => ({
38242
+ tool: tc.tool,
38243
+ input: tc.input,
38244
+ output: tc.output,
38245
+ id: tc.id,
38246
+ timestamp: tc.timestamp,
38247
+ durationMs: tc.duration_ms
38248
+ })),
37777
38249
  timestamp: msg.timestamp,
38250
+ durationMs: msg.duration_ms,
37778
38251
  metadata: msg.metadata
37779
38252
  }));
37780
38253
  }
@@ -38176,7 +38649,7 @@ function normalizeInputFiles2(inputFiles) {
38176
38649
  }
38177
38650
  const unique = /* @__PURE__ */ new Map();
38178
38651
  for (const inputFile of inputFiles) {
38179
- const absolutePath = path92.resolve(inputFile);
38652
+ const absolutePath = path102.resolve(inputFile);
38180
38653
  if (!unique.has(absolutePath)) {
38181
38654
  unique.set(absolutePath, absolutePath);
38182
38655
  }
@@ -38190,7 +38663,7 @@ function formatFileList(files, template) {
38190
38663
  const formatter = template ?? "{path}";
38191
38664
  return files.map((filePath) => {
38192
38665
  const escapedPath = shellEscape(filePath);
38193
- const escapedName = shellEscape(path92.basename(filePath));
38666
+ const escapedName = shellEscape(path102.basename(filePath));
38194
38667
  return formatter.replaceAll("{path}", escapedPath).replaceAll("{basename}", escapedName);
38195
38668
  }).join(" ");
38196
38669
  }
@@ -38214,7 +38687,7 @@ function generateOutputFilePath(evalCaseId, extension = ".json") {
38214
38687
  const safeEvalId = evalCaseId || "unknown";
38215
38688
  const timestamp = Date.now();
38216
38689
  const random = Math.random().toString(36).substring(2, 9);
38217
- return path92.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
38690
+ return path102.join(os2.tmpdir(), `agentv-${safeEvalId}-${timestamp}-${random}${extension}`);
38218
38691
  }
38219
38692
  function formatTimeoutSuffix2(timeoutMs) {
38220
38693
  if (!timeoutMs || timeoutMs <= 0) {
@@ -38305,7 +38778,7 @@ var CodexProvider = class {
38305
38778
  const promptContent = `${systemPrompt}
38306
38779
 
38307
38780
  ${basePrompt}`;
38308
- const promptFile = path102.join(workspaceRoot, PROMPT_FILENAME2);
38781
+ const promptFile = path112.join(workspaceRoot, PROMPT_FILENAME2);
38309
38782
  await writeFile22(promptFile, promptContent, "utf8");
38310
38783
  const args = this.buildCodexArgs();
38311
38784
  const cwd = this.resolveCwd(workspaceRoot);
@@ -38355,7 +38828,7 @@ ${basePrompt}`;
38355
38828
  if (!this.config.cwd) {
38356
38829
  return workspaceRoot;
38357
38830
  }
38358
- return path102.resolve(this.config.cwd);
38831
+ return path112.resolve(this.config.cwd);
38359
38832
  }
38360
38833
  buildCodexArgs() {
38361
38834
  const args = [
@@ -38397,7 +38870,7 @@ ${basePrompt}`;
38397
38870
  }
38398
38871
  }
38399
38872
  async createWorkspace() {
38400
- return await mkdtemp2(path102.join(tmpdir2(), WORKSPACE_PREFIX2));
38873
+ return await mkdtemp2(path112.join(tmpdir2(), WORKSPACE_PREFIX2));
38401
38874
  }
38402
38875
  async cleanupWorkspace(workspaceRoot) {
38403
38876
  try {
@@ -38411,9 +38884,9 @@ ${basePrompt}`;
38411
38884
  return void 0;
38412
38885
  }
38413
38886
  if (this.config.logDir) {
38414
- return path102.resolve(this.config.logDir);
38887
+ return path112.resolve(this.config.logDir);
38415
38888
  }
38416
- return path102.join(process.cwd(), ".agentv", "logs", "codex");
38889
+ return path112.join(process.cwd(), ".agentv", "logs", "codex");
38417
38890
  }
38418
38891
  async createStreamLogger(request) {
38419
38892
  const logDir = this.resolveLogDirectory();
@@ -38427,7 +38900,7 @@ ${basePrompt}`;
38427
38900
  console.warn(`Skipping Codex stream logging (could not create ${logDir}): ${message}`);
38428
38901
  return void 0;
38429
38902
  }
38430
- const filePath = path102.join(logDir, buildLogFilename2(request, this.targetName));
38903
+ const filePath = path112.join(logDir, buildLogFilename2(request, this.targetName));
38431
38904
  try {
38432
38905
  const logger = await CodexStreamLogger.create({
38433
38906
  filePath,
@@ -38642,7 +39115,7 @@ function tryParseJsonValue2(rawLine) {
38642
39115
  async function locateExecutable(candidate) {
38643
39116
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
38644
39117
  if (includesPathSeparator) {
38645
- const resolved = path102.isAbsolute(candidate) ? candidate : path102.resolve(candidate);
39118
+ const resolved = path112.isAbsolute(candidate) ? candidate : path112.resolve(candidate);
38646
39119
  const executablePath = await ensureWindowsExecutableVariant(resolved);
38647
39120
  await access22(executablePath, constants22.F_OK);
38648
39121
  return executablePath;
@@ -39216,7 +39689,7 @@ var PiCodingAgentProvider = class {
39216
39689
  const workspaceRoot = await this.createWorkspace();
39217
39690
  const logger = await this.createStreamLogger(request).catch(() => void 0);
39218
39691
  try {
39219
- const promptFile = path112.join(workspaceRoot, PROMPT_FILENAME3);
39692
+ const promptFile = path122.join(workspaceRoot, PROMPT_FILENAME3);
39220
39693
  await writeFile32(promptFile, request.question, "utf8");
39221
39694
  const args = this.buildPiArgs(request.question, inputFiles);
39222
39695
  const cwd = this.resolveCwd(workspaceRoot);
@@ -39258,7 +39731,7 @@ var PiCodingAgentProvider = class {
39258
39731
  if (!this.config.cwd) {
39259
39732
  return workspaceRoot;
39260
39733
  }
39261
- return path112.resolve(this.config.cwd);
39734
+ return path122.resolve(this.config.cwd);
39262
39735
  }
39263
39736
  buildPiArgs(prompt, inputFiles) {
39264
39737
  const args = [];
@@ -39347,7 +39820,7 @@ ${prompt}`;
39347
39820
  return env;
39348
39821
  }
39349
39822
  async createWorkspace() {
39350
- return await mkdtemp3(path112.join(tmpdir3(), WORKSPACE_PREFIX3));
39823
+ return await mkdtemp3(path122.join(tmpdir3(), WORKSPACE_PREFIX3));
39351
39824
  }
39352
39825
  async cleanupWorkspace(workspaceRoot) {
39353
39826
  try {
@@ -39357,9 +39830,9 @@ ${prompt}`;
39357
39830
  }
39358
39831
  resolveLogDirectory() {
39359
39832
  if (this.config.logDir) {
39360
- return path112.resolve(this.config.logDir);
39833
+ return path122.resolve(this.config.logDir);
39361
39834
  }
39362
- return path112.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
39835
+ return path122.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
39363
39836
  }
39364
39837
  async createStreamLogger(request) {
39365
39838
  const logDir = this.resolveLogDirectory();
@@ -39373,7 +39846,7 @@ ${prompt}`;
39373
39846
  console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
39374
39847
  return void 0;
39375
39848
  }
39376
- const filePath = path112.join(logDir, buildLogFilename3(request, this.targetName));
39849
+ const filePath = path122.join(logDir, buildLogFilename3(request, this.targetName));
39377
39850
  try {
39378
39851
  const logger = await PiStreamLogger.create({
39379
39852
  filePath,
@@ -39968,7 +40441,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
39968
40441
  return "";
39969
40442
  }
39970
40443
  const buildList = (files) => files.map((absolutePath) => {
39971
- const fileName = path122.basename(absolutePath);
40444
+ const fileName = path132.basename(absolutePath);
39972
40445
  const fileUri = pathToFileUri22(absolutePath);
39973
40446
  return `* [${fileName}](${fileUri})`;
39974
40447
  });
@@ -39993,8 +40466,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
39993
40466
  }
39994
40467
  const unique = /* @__PURE__ */ new Map();
39995
40468
  for (const attachment of attachments) {
39996
- const absolutePath = path122.resolve(attachment);
39997
- const normalized = absolutePath.split(path122.sep).join("/");
40469
+ const absolutePath = path132.resolve(attachment);
40470
+ const normalized = absolutePath.split(path132.sep).join("/");
39998
40471
  if (isGuidelineFile(normalized, guidelinePatterns)) {
39999
40472
  if (!unique.has(absolutePath)) {
40000
40473
  unique.set(absolutePath, absolutePath);
@@ -40009,7 +40482,7 @@ function collectAttachmentFiles(attachments) {
40009
40482
  }
40010
40483
  const unique = /* @__PURE__ */ new Map();
40011
40484
  for (const attachment of attachments) {
40012
- const absolutePath = path122.resolve(attachment);
40485
+ const absolutePath = path132.resolve(attachment);
40013
40486
  if (!unique.has(absolutePath)) {
40014
40487
  unique.set(absolutePath, absolutePath);
40015
40488
  }
@@ -40017,7 +40490,7 @@ function collectAttachmentFiles(attachments) {
40017
40490
  return Array.from(unique.values());
40018
40491
  }
40019
40492
  function pathToFileUri22(filePath) {
40020
- const absolutePath = path122.isAbsolute(filePath) ? filePath : path122.resolve(filePath);
40493
+ const absolutePath = path132.isAbsolute(filePath) ? filePath : path132.resolve(filePath);
40021
40494
  const normalizedPath = absolutePath.replace(/\\/g, "/");
40022
40495
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
40023
40496
  return `file:///${normalizedPath}`;
@@ -40030,7 +40503,7 @@ function normalizeAttachments(attachments) {
40030
40503
  }
40031
40504
  const deduped = /* @__PURE__ */ new Set();
40032
40505
  for (const attachment of attachments) {
40033
- deduped.add(path122.resolve(attachment));
40506
+ deduped.add(path132.resolve(attachment));
40034
40507
  }
40035
40508
  return Array.from(deduped);
40036
40509
  }
@@ -40039,7 +40512,7 @@ function mergeAttachments(all) {
40039
40512
  for (const list of all) {
40040
40513
  if (!list) continue;
40041
40514
  for (const inputFile of list) {
40042
- deduped.add(path122.resolve(inputFile));
40515
+ deduped.add(path132.resolve(inputFile));
40043
40516
  }
40044
40517
  }
40045
40518
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -40119,11 +40592,11 @@ async function fileExists3(filePath) {
40119
40592
  }
40120
40593
  }
40121
40594
  async function readTargetDefinitions(filePath) {
40122
- const absolutePath = path132.resolve(filePath);
40595
+ const absolutePath = path14.resolve(filePath);
40123
40596
  if (!await fileExists3(absolutePath)) {
40124
40597
  throw new Error(`targets.yaml not found at ${absolutePath}`);
40125
40598
  }
40126
- const raw = await readFile6(absolutePath, "utf8");
40599
+ const raw = await readFile7(absolutePath, "utf8");
40127
40600
  const parsed = parse32(raw);
40128
40601
  if (!isRecord(parsed)) {
40129
40602
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -40320,15 +40793,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
40320
40793
  });
40321
40794
  }
40322
40795
  async function execShellWithStdin(command7, stdinPayload, options = {}) {
40323
- const { mkdir: mkdir42, readFile: readFile72, rm: rm4, writeFile: writeFile42 } = await import("node:fs/promises");
40796
+ const { mkdir: mkdir42, readFile: readFile82, rm: rm4, writeFile: writeFile42 } = await import("node:fs/promises");
40324
40797
  const { tmpdir: tmpdir4 } = await import("node:os");
40325
- const path152 = await import("node:path");
40798
+ const path162 = await import("node:path");
40326
40799
  const { randomUUID: randomUUID4 } = await import("node:crypto");
40327
- const dir = path152.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
40800
+ const dir = path162.join(tmpdir4(), `agentv-exec-${randomUUID4()}`);
40328
40801
  await mkdir42(dir, { recursive: true });
40329
- const stdinPath = path152.join(dir, "stdin.txt");
40330
- const stdoutPath = path152.join(dir, "stdout.txt");
40331
- const stderrPath = path152.join(dir, "stderr.txt");
40802
+ const stdinPath = path162.join(dir, "stdin.txt");
40803
+ const stdoutPath = path162.join(dir, "stdout.txt");
40804
+ const stderrPath = path162.join(dir, "stderr.txt");
40332
40805
  await writeFile42(stdinPath, stdinPayload, "utf8");
40333
40806
  const wrappedCommand = process.platform === "win32" ? `(${command7}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command7}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
40334
40807
  const { spawn: spawn4 } = await import("node:child_process");
@@ -40358,8 +40831,8 @@ async function execShellWithStdin(command7, stdinPayload, options = {}) {
40358
40831
  resolve2(code ?? 0);
40359
40832
  });
40360
40833
  });
40361
- const stdout = (await readFile72(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
40362
- const stderr = (await readFile72(stderrPath, "utf8")).replace(/\r\n/g, "\n");
40834
+ const stdout = (await readFile82(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
40835
+ const stderr = (await readFile82(stderrPath, "utf8")).replace(/\r\n/g, "\n");
40363
40836
  return { stdout, stderr, exitCode };
40364
40837
  } finally {
40365
40838
  await rm4(dir, { recursive: true, force: true });
@@ -40623,7 +41096,7 @@ var CodeEvaluator = class {
40623
41096
  outputMessages: context.outputMessages ?? null,
40624
41097
  guidelineFiles: context.evalCase.guideline_paths,
40625
41098
  inputFiles: context.evalCase.file_paths.filter(
40626
- (path152) => !context.evalCase.guideline_paths.includes(path152)
41099
+ (path162) => !context.evalCase.guideline_paths.includes(path162)
40627
41100
  ),
40628
41101
  inputMessages: context.evalCase.input_messages,
40629
41102
  traceSummary: context.traceSummary ?? null,
@@ -40764,6 +41237,15 @@ var rubricEvaluationSchema = external_exports.object({
40764
41237
  checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
40765
41238
  overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
40766
41239
  });
41240
+ var scoreRangeCheckResultSchema = external_exports.object({
41241
+ id: external_exports.string().describe("The ID of the rubric criterion being scored"),
41242
+ score: external_exports.number().int().min(0).max(10).describe("Integer score 0-10 for this criterion"),
41243
+ reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this score").optional()
41244
+ });
41245
+ var scoreRangeEvaluationSchema = external_exports.object({
41246
+ checks: external_exports.array(scoreRangeCheckResultSchema).describe("Scores for each rubric criterion"),
41247
+ overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)").optional()
41248
+ });
40767
41249
  var LlmJudgeEvaluator = class {
40768
41250
  kind = "llm_judge";
40769
41251
  resolveJudgeProvider;
@@ -40849,6 +41331,10 @@ var LlmJudgeEvaluator = class {
40849
41331
  `No rubrics found for evaluator "${context.evaluator?.name ?? "llm_judge"}". Run "agentv generate rubrics" first.`
40850
41332
  );
40851
41333
  }
41334
+ const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
41335
+ if (hasScoreRanges) {
41336
+ return this.evaluateWithScoreRanges(context, judgeProvider, rubrics);
41337
+ }
40852
41338
  const prompt = this.buildRubricPrompt(context, rubrics);
40853
41339
  const systemPrompt = buildRubricOutputSchema();
40854
41340
  const evaluatorRawRequest = {
@@ -40874,6 +41360,84 @@ var LlmJudgeEvaluator = class {
40874
41360
  evaluatorRawRequest
40875
41361
  };
40876
41362
  }
41363
+ /**
41364
+ * Evaluate using score-range rubrics (analytic rubric scoring).
41365
+ * Each criterion is scored 0-10 and normalized to 0-1.
41366
+ */
41367
+ async evaluateWithScoreRanges(context, judgeProvider, rubrics) {
41368
+ const prompt = this.buildScoreRangePrompt(context, rubrics);
41369
+ const systemPrompt = buildScoreRangeOutputSchema();
41370
+ const evaluatorRawRequest = {
41371
+ userPrompt: prompt,
41372
+ systemPrompt,
41373
+ target: judgeProvider.targetName
41374
+ };
41375
+ const { data } = await this.runWithRetry({
41376
+ context,
41377
+ judgeProvider,
41378
+ systemPrompt,
41379
+ userPrompt: prompt,
41380
+ schema: scoreRangeEvaluationSchema
41381
+ });
41382
+ const { score, verdict, hits, misses, details } = calculateScoreRangeResult(data, rubrics);
41383
+ return {
41384
+ score,
41385
+ verdict,
41386
+ hits,
41387
+ misses,
41388
+ expectedAspectCount: rubrics.length,
41389
+ reasoning: data.overall_reasoning,
41390
+ evaluatorRawRequest,
41391
+ details
41392
+ };
41393
+ }
41394
+ /**
41395
+ * Build prompt for score-range rubric evaluation.
41396
+ */
41397
+ buildScoreRangePrompt(context, rubrics) {
41398
+ const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
41399
+ const parts = [
41400
+ "You are an expert evaluator. Score the candidate answer on each criterion below using the provided score ranges.",
41401
+ "For each criterion, output an integer score from 0 to 10 based on which score range best matches the answer.",
41402
+ "",
41403
+ "[[ ## question ## ]]",
41404
+ formattedQuestion,
41405
+ "",
41406
+ "[[ ## expected_outcome ## ]]",
41407
+ context.evalCase.expected_outcome,
41408
+ ""
41409
+ ];
41410
+ if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
41411
+ parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
41412
+ }
41413
+ parts.push(
41414
+ "[[ ## candidate_answer ## ]]",
41415
+ context.candidate,
41416
+ "",
41417
+ "[[ ## scoring_criteria ## ]]"
41418
+ );
41419
+ for (const rubric of rubrics) {
41420
+ const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
41421
+ const minScoreLabel = rubric.required_min_score !== void 0 ? ` [REQUIRED: min score ${rubric.required_min_score}]` : "";
41422
+ parts.push("", `### Criterion: ${rubric.id}${weightLabel}${minScoreLabel}`);
41423
+ if (rubric.expected_outcome) {
41424
+ parts.push(`Description: ${rubric.expected_outcome}`);
41425
+ }
41426
+ if (rubric.score_ranges && rubric.score_ranges.length > 0) {
41427
+ parts.push("Score ranges:");
41428
+ for (const range of rubric.score_ranges) {
41429
+ const [min, max] = range.score_range;
41430
+ const rangeLabel = min === max ? `${min}` : `${min}-${max}`;
41431
+ parts.push(` - Score ${rangeLabel}: ${range.expected_outcome}`);
41432
+ }
41433
+ }
41434
+ }
41435
+ parts.push(
41436
+ "",
41437
+ "For each criterion, provide an integer score 0-10 that matches one of its defined score ranges."
41438
+ );
41439
+ return parts.join("\n");
41440
+ }
40877
41441
  buildRubricPrompt(context, rubrics) {
40878
41442
  const formattedQuestion = context.promptInputs.question && context.promptInputs.question.trim().length > 0 ? context.promptInputs.question : context.evalCase.question;
40879
41443
  const parts = [
@@ -40893,7 +41457,7 @@ var LlmJudgeEvaluator = class {
40893
41457
  for (const rubric of rubrics) {
40894
41458
  const requiredLabel = rubric.required ? " (REQUIRED)" : "";
40895
41459
  const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
40896
- parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
41460
+ parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.expected_outcome}`);
40897
41461
  }
40898
41462
  parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
40899
41463
  return parts.join("\n");
@@ -40980,9 +41544,9 @@ function calculateRubricScore(result, rubrics) {
40980
41544
  totalWeight += rubric.weight;
40981
41545
  if (check2.satisfied) {
40982
41546
  earnedWeight += rubric.weight;
40983
- hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
41547
+ hits.push(`[${rubric.id}] ${rubric.expected_outcome}: ${check2.reasoning}`);
40984
41548
  } else {
40985
- misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
41549
+ misses.push(`[${rubric.id}] ${rubric.expected_outcome}: ${check2.reasoning}`);
40986
41550
  if (rubric.required) {
40987
41551
  failedRequired = true;
40988
41552
  }
@@ -40992,6 +41556,76 @@ function calculateRubricScore(result, rubrics) {
40992
41556
  const verdict = failedRequired ? "fail" : scoreToVerdict(score);
40993
41557
  return { score, verdict, hits, misses };
40994
41558
  }
41559
+ function buildScoreRangeOutputSchema() {
41560
+ return `You are an expert evaluator. Score the candidate answer on each criterion.
41561
+ You must return a valid JSON object matching this schema:
41562
+ {
41563
+ "checks": [
41564
+ {
41565
+ "id": "string (criterion id)",
41566
+ "score": integer (0-10),
41567
+ "reasoning": "string (brief explanation for score)"
41568
+ }
41569
+ ],
41570
+ "overall_reasoning": "string (summary, optional)"
41571
+ }
41572
+
41573
+ Important: The "score" must be an integer from 0 to 10 that falls within one of the defined score ranges for that criterion.`;
41574
+ }
41575
+ function calculateScoreRangeResult(result, rubrics) {
41576
+ const rubricMap = new Map(rubrics.map((rubric) => [rubric.id, rubric]));
41577
+ const hits = [];
41578
+ const misses = [];
41579
+ const rawScores = {};
41580
+ let totalWeight = 0;
41581
+ let weightedScoreSum = 0;
41582
+ let failedRequired = false;
41583
+ for (const check2 of result.checks) {
41584
+ const rubric = rubricMap.get(check2.id);
41585
+ if (!rubric) {
41586
+ continue;
41587
+ }
41588
+ const rawScore = Math.max(0, Math.min(10, check2.score));
41589
+ const normalizedScore = rawScore / 10;
41590
+ rawScores[rubric.id] = rawScore;
41591
+ totalWeight += rubric.weight;
41592
+ weightedScoreSum += normalizedScore * rubric.weight;
41593
+ let requiredMinScore;
41594
+ if (rubric.required_min_score !== void 0) {
41595
+ requiredMinScore = rubric.required_min_score;
41596
+ } else if (rubric.required === true) {
41597
+ requiredMinScore = 10;
41598
+ }
41599
+ const matchingRange = rubric.score_ranges?.find(
41600
+ (r) => rawScore >= r.score_range[0] && rawScore <= r.score_range[1]
41601
+ );
41602
+ const rangeDescription = matchingRange?.expected_outcome ?? "";
41603
+ const criterionLabel = rubric.expected_outcome ?? rubric.id;
41604
+ const reasoningText = check2.reasoning ? `: ${check2.reasoning}` : "";
41605
+ const scoreInfo = `[${rubric.id}] ${criterionLabel} - Score: ${rawScore}/10 (${rangeDescription})${reasoningText}`;
41606
+ if (requiredMinScore !== void 0 && rawScore < requiredMinScore) {
41607
+ failedRequired = true;
41608
+ misses.push(scoreInfo);
41609
+ } else if (rawScore >= 7) {
41610
+ hits.push(scoreInfo);
41611
+ } else {
41612
+ misses.push(scoreInfo);
41613
+ }
41614
+ }
41615
+ const score = totalWeight > 0 ? Math.min(1, Math.max(0, weightedScoreSum / totalWeight)) : 0;
41616
+ const verdict = failedRequired ? "fail" : scoreToVerdict(score);
41617
+ return {
41618
+ score,
41619
+ verdict,
41620
+ hits,
41621
+ misses,
41622
+ details: {
41623
+ raw_scores: rawScores,
41624
+ normalization: "score / 10",
41625
+ aggregation: "weighted_average"
41626
+ }
41627
+ };
41628
+ }
40995
41629
  var DEFAULT_COMPOSITE_AGGREGATOR_PROMPT = `Review the following evaluation results:
40996
41630
  {{EVALUATOR_RESULTS_JSON}}
40997
41631
 
@@ -41369,115 +42003,115 @@ var FieldAccuracyEvaluator = class {
41369
42003
  * Evaluate a single field against the expected value.
41370
42004
  */
41371
42005
  evaluateField(fieldConfig, candidateData, expectedData) {
41372
- const { path: path152, match, required: required2 = true, weight = 1 } = fieldConfig;
41373
- const candidateValue = resolvePath(candidateData, path152);
41374
- const expectedValue = resolvePath(expectedData, path152);
42006
+ const { path: path162, match, required: required2 = true, weight = 1 } = fieldConfig;
42007
+ const candidateValue = resolvePath(candidateData, path162);
42008
+ const expectedValue = resolvePath(expectedData, path162);
41375
42009
  if (expectedValue === void 0) {
41376
42010
  return {
41377
- path: path152,
42011
+ path: path162,
41378
42012
  score: 1,
41379
42013
  // No expected value means no comparison needed
41380
42014
  weight,
41381
42015
  hit: true,
41382
- message: `${path152}: no expected value`
42016
+ message: `${path162}: no expected value`
41383
42017
  };
41384
42018
  }
41385
42019
  if (candidateValue === void 0) {
41386
42020
  if (required2) {
41387
42021
  return {
41388
- path: path152,
42022
+ path: path162,
41389
42023
  score: 0,
41390
42024
  weight,
41391
42025
  hit: false,
41392
- message: `${path152} (required, missing)`
42026
+ message: `${path162} (required, missing)`
41393
42027
  };
41394
42028
  }
41395
42029
  return {
41396
- path: path152,
42030
+ path: path162,
41397
42031
  score: 1,
41398
42032
  // Don't penalize missing optional fields
41399
42033
  weight: 0,
41400
42034
  // Zero weight means it won't affect the score
41401
42035
  hit: true,
41402
- message: `${path152}: optional field missing`
42036
+ message: `${path162}: optional field missing`
41403
42037
  };
41404
42038
  }
41405
42039
  switch (match) {
41406
42040
  case "exact":
41407
- return this.compareExact(path152, candidateValue, expectedValue, weight);
42041
+ return this.compareExact(path162, candidateValue, expectedValue, weight);
41408
42042
  case "numeric_tolerance":
41409
42043
  return this.compareNumericTolerance(
41410
- path152,
42044
+ path162,
41411
42045
  candidateValue,
41412
42046
  expectedValue,
41413
42047
  fieldConfig,
41414
42048
  weight
41415
42049
  );
41416
42050
  case "date":
41417
- return this.compareDate(path152, candidateValue, expectedValue, fieldConfig, weight);
42051
+ return this.compareDate(path162, candidateValue, expectedValue, fieldConfig, weight);
41418
42052
  default:
41419
42053
  return {
41420
- path: path152,
42054
+ path: path162,
41421
42055
  score: 0,
41422
42056
  weight,
41423
42057
  hit: false,
41424
- message: `${path152}: unknown match type "${match}"`
42058
+ message: `${path162}: unknown match type "${match}"`
41425
42059
  };
41426
42060
  }
41427
42061
  }
41428
42062
  /**
41429
42063
  * Exact equality comparison.
41430
42064
  */
41431
- compareExact(path152, candidateValue, expectedValue, weight) {
42065
+ compareExact(path162, candidateValue, expectedValue, weight) {
41432
42066
  if (deepEqual(candidateValue, expectedValue)) {
41433
42067
  return {
41434
- path: path152,
42068
+ path: path162,
41435
42069
  score: 1,
41436
42070
  weight,
41437
42071
  hit: true,
41438
- message: path152
42072
+ message: path162
41439
42073
  };
41440
42074
  }
41441
42075
  if (typeof candidateValue !== typeof expectedValue) {
41442
42076
  return {
41443
- path: path152,
42077
+ path: path162,
41444
42078
  score: 0,
41445
42079
  weight,
41446
42080
  hit: false,
41447
- message: `${path152} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
42081
+ message: `${path162} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
41448
42082
  };
41449
42083
  }
41450
42084
  return {
41451
- path: path152,
42085
+ path: path162,
41452
42086
  score: 0,
41453
42087
  weight,
41454
42088
  hit: false,
41455
- message: `${path152} (value mismatch)`
42089
+ message: `${path162} (value mismatch)`
41456
42090
  };
41457
42091
  }
41458
42092
  /**
41459
42093
  * Numeric comparison with absolute or relative tolerance.
41460
42094
  */
41461
- compareNumericTolerance(path152, candidateValue, expectedValue, fieldConfig, weight) {
42095
+ compareNumericTolerance(path162, candidateValue, expectedValue, fieldConfig, weight) {
41462
42096
  const { tolerance = 0, relative = false } = fieldConfig;
41463
42097
  const candidateNum = toNumber(candidateValue);
41464
42098
  const expectedNum = toNumber(expectedValue);
41465
42099
  if (candidateNum === null || expectedNum === null) {
41466
42100
  return {
41467
- path: path152,
42101
+ path: path162,
41468
42102
  score: 0,
41469
42103
  weight,
41470
42104
  hit: false,
41471
- message: `${path152} (non-numeric value)`
42105
+ message: `${path162} (non-numeric value)`
41472
42106
  };
41473
42107
  }
41474
42108
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
41475
42109
  return {
41476
- path: path152,
42110
+ path: path162,
41477
42111
  score: 0,
41478
42112
  weight,
41479
42113
  hit: false,
41480
- message: `${path152} (invalid numeric value)`
42114
+ message: `${path162} (invalid numeric value)`
41481
42115
  };
41482
42116
  }
41483
42117
  const diff = Math.abs(candidateNum - expectedNum);
@@ -41490,61 +42124,61 @@ var FieldAccuracyEvaluator = class {
41490
42124
  }
41491
42125
  if (withinTolerance) {
41492
42126
  return {
41493
- path: path152,
42127
+ path: path162,
41494
42128
  score: 1,
41495
42129
  weight,
41496
42130
  hit: true,
41497
- message: `${path152} (within tolerance: diff=${diff.toFixed(2)})`
42131
+ message: `${path162} (within tolerance: diff=${diff.toFixed(2)})`
41498
42132
  };
41499
42133
  }
41500
42134
  return {
41501
- path: path152,
42135
+ path: path162,
41502
42136
  score: 0,
41503
42137
  weight,
41504
42138
  hit: false,
41505
- message: `${path152} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
42139
+ message: `${path162} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
41506
42140
  };
41507
42141
  }
41508
42142
  /**
41509
42143
  * Date comparison with format normalization.
41510
42144
  */
41511
- compareDate(path152, candidateValue, expectedValue, fieldConfig, weight) {
42145
+ compareDate(path162, candidateValue, expectedValue, fieldConfig, weight) {
41512
42146
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
41513
42147
  const candidateDate = parseDate(String(candidateValue), formats);
41514
42148
  const expectedDate = parseDate(String(expectedValue), formats);
41515
42149
  if (candidateDate === null) {
41516
42150
  return {
41517
- path: path152,
42151
+ path: path162,
41518
42152
  score: 0,
41519
42153
  weight,
41520
42154
  hit: false,
41521
- message: `${path152} (unparseable candidate date)`
42155
+ message: `${path162} (unparseable candidate date)`
41522
42156
  };
41523
42157
  }
41524
42158
  if (expectedDate === null) {
41525
42159
  return {
41526
- path: path152,
42160
+ path: path162,
41527
42161
  score: 0,
41528
42162
  weight,
41529
42163
  hit: false,
41530
- message: `${path152} (unparseable expected date)`
42164
+ message: `${path162} (unparseable expected date)`
41531
42165
  };
41532
42166
  }
41533
42167
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
41534
42168
  return {
41535
- path: path152,
42169
+ path: path162,
41536
42170
  score: 1,
41537
42171
  weight,
41538
42172
  hit: true,
41539
- message: path152
42173
+ message: path162
41540
42174
  };
41541
42175
  }
41542
42176
  return {
41543
- path: path152,
42177
+ path: path162,
41544
42178
  score: 0,
41545
42179
  weight,
41546
42180
  hit: false,
41547
- message: `${path152} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
42181
+ message: `${path162} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
41548
42182
  };
41549
42183
  }
41550
42184
  /**
@@ -41584,11 +42218,11 @@ var FieldAccuracyEvaluator = class {
41584
42218
  };
41585
42219
  }
41586
42220
  };
41587
- function resolvePath(obj, path152) {
41588
- if (!path152 || !obj) {
42221
+ function resolvePath(obj, path162) {
42222
+ if (!path162 || !obj) {
41589
42223
  return void 0;
41590
42224
  }
41591
- const parts = path152.split(/\.|\[|\]/).filter((p) => p.length > 0);
42225
+ const parts = path162.split(/\.|\[|\]/).filter((p) => p.length > 0);
41592
42226
  let current = obj;
41593
42227
  for (const part of parts) {
41594
42228
  if (current === null || current === void 0) {
@@ -41807,6 +42441,27 @@ function argsMatch(expected, actual) {
41807
42441
  }
41808
42442
  return true;
41809
42443
  }
42444
+ function checkLatency(toolName, maxDurationMs, actualDurationMs) {
42445
+ if (maxDurationMs === void 0) {
42446
+ return { status: "skip", message: "" };
42447
+ }
42448
+ if (actualDurationMs === void 0) {
42449
+ return {
42450
+ status: "skip",
42451
+ message: `No duration data for ${toolName}; latency assertion skipped`
42452
+ };
42453
+ }
42454
+ if (actualDurationMs <= maxDurationMs) {
42455
+ return {
42456
+ status: "pass",
42457
+ message: `${toolName} completed in ${actualDurationMs}ms (max: ${maxDurationMs}ms)`
42458
+ };
42459
+ }
42460
+ return {
42461
+ status: "fail",
42462
+ message: `${toolName} took ${actualDurationMs}ms (max: ${maxDurationMs}ms)`
42463
+ };
42464
+ }
41810
42465
  var ToolTrajectoryEvaluator = class {
41811
42466
  kind = "tool_trajectory";
41812
42467
  config;
@@ -41865,7 +42520,8 @@ var ToolTrajectoryEvaluator = class {
41865
42520
  for (const call of message.toolCalls) {
41866
42521
  toolCalls.push({
41867
42522
  name: call.tool,
41868
- args: call.input
42523
+ args: call.input,
42524
+ durationMs: call.durationMs
41869
42525
  });
41870
42526
  }
41871
42527
  }
@@ -41933,17 +42589,27 @@ var ToolTrajectoryEvaluator = class {
41933
42589
  }
41934
42590
  const hits = [];
41935
42591
  const misses = [];
42592
+ const warnings = [];
41936
42593
  let actualIndex = 0;
42594
+ let sequenceHits = 0;
42595
+ let latencyHits = 0;
42596
+ let latencySkips = 0;
42597
+ const latencyAssertionCount = expected.filter(
42598
+ (item) => item.maxDurationMs !== void 0
42599
+ ).length;
41937
42600
  for (let i = 0; i < expected.length; i++) {
41938
42601
  const expectedItem = expected[i];
41939
42602
  const expectedTool = expectedItem.tool;
41940
42603
  let found = false;
41941
42604
  let argsMismatch = false;
42605
+ let matchedCall;
41942
42606
  while (actualIndex < toolCalls.length) {
41943
42607
  const actualCall = toolCalls[actualIndex];
41944
42608
  if (actualCall.name === expectedTool) {
41945
42609
  if (argsMatch(expectedItem.args, actualCall.args)) {
41946
42610
  hits.push(`Found ${expectedTool} at position ${actualIndex}`);
42611
+ sequenceHits++;
42612
+ matchedCall = actualCall;
41947
42613
  actualIndex++;
41948
42614
  found = true;
41949
42615
  break;
@@ -41960,14 +42626,35 @@ var ToolTrajectoryEvaluator = class {
41960
42626
  if (!found && !argsMismatch) {
41961
42627
  misses.push(`Expected ${expectedTool} at position ${i}, not found in remaining trace`);
41962
42628
  }
42629
+ if (found && matchedCall) {
42630
+ const latencyResult = checkLatency(
42631
+ expectedTool,
42632
+ expectedItem.maxDurationMs,
42633
+ matchedCall.durationMs
42634
+ );
42635
+ if (latencyResult.status === "pass") {
42636
+ hits.push(latencyResult.message);
42637
+ latencyHits++;
42638
+ } else if (latencyResult.status === "fail") {
42639
+ misses.push(latencyResult.message);
42640
+ } else if (latencyResult.message) {
42641
+ warnings.push(latencyResult.message);
42642
+ latencySkips++;
42643
+ }
42644
+ }
41963
42645
  }
41964
- const score = hits.length / expected.length;
42646
+ for (const warning of warnings) {
42647
+ console.warn(`[tool_trajectory] ${warning}`);
42648
+ }
42649
+ const effectiveLatencyAssertions = latencyAssertionCount - latencySkips;
42650
+ const totalAssertions = expected.length + effectiveLatencyAssertions;
42651
+ const score = totalAssertions > 0 ? (sequenceHits + latencyHits) / totalAssertions : 1;
41965
42652
  return {
41966
42653
  score,
41967
42654
  verdict: scoreToVerdict(score),
41968
42655
  hits,
41969
42656
  misses,
41970
- expectedAspectCount: expected.length
42657
+ expectedAspectCount: totalAssertions
41971
42658
  };
41972
42659
  }
41973
42660
  evaluateExact(toolCalls) {
@@ -41983,6 +42670,13 @@ var ToolTrajectoryEvaluator = class {
41983
42670
  }
41984
42671
  const hits = [];
41985
42672
  const misses = [];
42673
+ const warnings = [];
42674
+ let sequenceHits = 0;
42675
+ let latencyHits = 0;
42676
+ let latencySkips = 0;
42677
+ const latencyAssertionCount = expected.filter(
42678
+ (item) => item.maxDurationMs !== void 0
42679
+ ).length;
41986
42680
  if (toolCalls.length !== expected.length) {
41987
42681
  misses.push(`Expected ${expected.length} tool calls, got ${toolCalls.length}`);
41988
42682
  }
@@ -41992,26 +42686,50 @@ var ToolTrajectoryEvaluator = class {
41992
42686
  const expectedTool = expectedItem.tool;
41993
42687
  const actualCall = toolCalls[i];
41994
42688
  const actualTool = actualCall.name;
42689
+ let sequenceMatched = false;
41995
42690
  if (actualTool === expectedTool) {
41996
42691
  if (argsMatch(expectedItem.args, actualCall.args)) {
41997
42692
  hits.push(`Position ${i}: ${expectedTool}`);
42693
+ sequenceHits++;
42694
+ sequenceMatched = true;
41998
42695
  } else {
41999
42696
  misses.push(`Position ${i}: ${expectedTool} args mismatch`);
42000
42697
  }
42001
42698
  } else {
42002
42699
  misses.push(`Position ${i}: expected ${expectedTool}, got ${actualTool}`);
42003
42700
  }
42701
+ if (sequenceMatched) {
42702
+ const latencyResult = checkLatency(
42703
+ expectedTool,
42704
+ expectedItem.maxDurationMs,
42705
+ actualCall.durationMs
42706
+ );
42707
+ if (latencyResult.status === "pass") {
42708
+ hits.push(latencyResult.message);
42709
+ latencyHits++;
42710
+ } else if (latencyResult.status === "fail") {
42711
+ misses.push(latencyResult.message);
42712
+ } else if (latencyResult.message) {
42713
+ warnings.push(latencyResult.message);
42714
+ latencySkips++;
42715
+ }
42716
+ }
42004
42717
  }
42005
42718
  for (let i = checkLength; i < expected.length; i++) {
42006
42719
  misses.push(`Position ${i}: expected ${expected[i].tool}, got nothing`);
42007
42720
  }
42008
- const score = hits.length / expected.length;
42721
+ for (const warning of warnings) {
42722
+ console.warn(`[tool_trajectory] ${warning}`);
42723
+ }
42724
+ const effectiveLatencyAssertions = latencyAssertionCount - latencySkips;
42725
+ const totalAssertions = expected.length + effectiveLatencyAssertions;
42726
+ const score = totalAssertions > 0 ? (sequenceHits + latencyHits) / totalAssertions : 1;
42009
42727
  return {
42010
42728
  score,
42011
42729
  verdict: scoreToVerdict(score),
42012
42730
  hits,
42013
42731
  misses,
42014
- expectedAspectCount: expected.length
42732
+ expectedAspectCount: totalAssertions
42015
42733
  };
42016
42734
  }
42017
42735
  };
@@ -42167,17 +42885,17 @@ async function runEvaluation(options) {
42167
42885
  cache,
42168
42886
  useCache,
42169
42887
  now,
42170
- evalId,
42888
+ filter: filter2,
42171
42889
  verbose,
42172
42890
  evalCases: preloadedEvalCases,
42173
42891
  onResult,
42174
42892
  onProgress
42175
42893
  } = options;
42176
- const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, evalId });
42177
- const filteredEvalCases = filterEvalCases(evalCases, evalId);
42894
+ const evalCases = preloadedEvalCases ?? await loadEvalCases(evalFilePath, repoRoot, { verbose, filter: filter2 });
42895
+ const filteredEvalCases = filterEvalCases(evalCases, filter2);
42178
42896
  if (filteredEvalCases.length === 0) {
42179
- if (evalId) {
42180
- throw new Error(`Eval case with id '${evalId}' not found in ${evalFilePath}`);
42897
+ if (filter2) {
42898
+ throw new Error(`No eval cases matched filter '${filter2}' in ${evalFilePath}`);
42181
42899
  }
42182
42900
  return [];
42183
42901
  }
@@ -42753,7 +43471,10 @@ async function runEvaluatorList(options) {
42753
43471
  attempt,
42754
43472
  promptInputs,
42755
43473
  now,
42756
- judgeProvider
43474
+ judgeProvider,
43475
+ outputMessages,
43476
+ traceSummary,
43477
+ agentTimeoutMs
42757
43478
  });
42758
43479
  const weight = evaluator.weight ?? 1;
42759
43480
  scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
@@ -42807,7 +43528,7 @@ async function runEvaluatorList(options) {
42807
43528
  });
42808
43529
  }
42809
43530
  if (evaluator.type === "composite") {
42810
- const evalFileDir = evalCase.guideline_paths[0] ? path14.dirname(evalCase.guideline_paths[0]) : process.cwd();
43531
+ const evalFileDir = evalCase.guideline_paths[0] ? path15.dirname(evalCase.guideline_paths[0]) : process.cwd();
42811
43532
  const createEvaluator = (memberConfig) => {
42812
43533
  switch (memberConfig.type) {
42813
43534
  case "llm_judge":
@@ -43088,9 +43809,22 @@ async function runLlmJudgeEvaluator(options) {
43088
43809
  attempt,
43089
43810
  promptInputs,
43090
43811
  now,
43091
- judgeProvider
43812
+ judgeProvider,
43813
+ outputMessages,
43814
+ traceSummary,
43815
+ agentTimeoutMs
43092
43816
  } = options;
43093
- const customPrompt = await resolveCustomPrompt(config2);
43817
+ const customPrompt = await resolveCustomPrompt(
43818
+ config2,
43819
+ {
43820
+ evalCase,
43821
+ candidate,
43822
+ outputMessages,
43823
+ traceSummary,
43824
+ config: config2.config
43825
+ },
43826
+ agentTimeoutMs
43827
+ );
43094
43828
  return evaluatorRegistry.llm_judge.evaluate({
43095
43829
  evalCase,
43096
43830
  candidate,
@@ -43104,23 +43838,70 @@ async function runLlmJudgeEvaluator(options) {
43104
43838
  evaluator: config2
43105
43839
  });
43106
43840
  }
43107
- async function resolveCustomPrompt(config2) {
43108
- if (config2.promptPath) {
43841
+ async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
43842
+ if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
43843
+ if (!context) {
43844
+ throw new Error("Context required for executable prompt templates");
43845
+ }
43846
+ return executePromptTemplate(
43847
+ promptConfig.resolvedPromptScript,
43848
+ context,
43849
+ promptConfig.config,
43850
+ timeoutMs
43851
+ );
43852
+ }
43853
+ const promptPath = promptConfig.resolvedPromptPath ?? promptConfig.promptPath;
43854
+ if (promptPath) {
43109
43855
  try {
43110
- const content = await readTextFile(config2.promptPath);
43856
+ const content = await readTextFile(promptPath);
43111
43857
  return content;
43112
43858
  } catch (error40) {
43113
43859
  const message = error40 instanceof Error ? error40.message : String(error40);
43114
- console.warn(`Could not read custom prompt at ${config2.promptPath}: ${message}`);
43860
+ console.warn(`Could not read custom prompt at ${promptPath}: ${message}`);
43115
43861
  }
43116
43862
  }
43117
- return config2.prompt;
43863
+ const promptValue = promptConfig.prompt;
43864
+ if (typeof promptValue === "string") {
43865
+ return promptValue;
43866
+ }
43867
+ return void 0;
43868
+ }
43869
+ async function executePromptTemplate(script, context, config2, timeoutMs) {
43870
+ const payload = {
43871
+ question: context.evalCase.question,
43872
+ expectedOutcome: context.evalCase.expected_outcome,
43873
+ expectedMessages: context.evalCase.expected_messages,
43874
+ referenceAnswer: context.evalCase.reference_answer,
43875
+ candidateAnswer: context.candidate,
43876
+ outputMessages: context.outputMessages ?? null,
43877
+ guidelineFiles: context.evalCase.guideline_paths,
43878
+ inputFiles: context.evalCase.file_paths.filter(
43879
+ (p) => !context.evalCase.guideline_paths.includes(p)
43880
+ ),
43881
+ inputMessages: context.evalCase.input_messages,
43882
+ traceSummary: context.traceSummary ?? null,
43883
+ config: config2 ?? context.config ?? null
43884
+ };
43885
+ const inputJson = JSON.stringify(toSnakeCaseDeep2(payload), null, 2);
43886
+ const scriptPath = script[script.length - 1];
43887
+ const cwd = path15.dirname(scriptPath);
43888
+ try {
43889
+ const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
43890
+ const prompt = stdout.trim();
43891
+ if (!prompt) {
43892
+ throw new Error("Prompt template produced empty output");
43893
+ }
43894
+ return prompt;
43895
+ } catch (error40) {
43896
+ const message = error40 instanceof Error ? error40.message : String(error40);
43897
+ throw new Error(`Prompt template execution failed: ${message}`);
43898
+ }
43118
43899
  }
43119
- function filterEvalCases(evalCases, evalId) {
43120
- if (!evalId) {
43900
+ function filterEvalCases(evalCases, filter2) {
43901
+ if (!filter2) {
43121
43902
  return evalCases;
43122
43903
  }
43123
- return evalCases.filter((evalCase) => evalCase.id === evalId);
43904
+ return evalCases.filter((evalCase) => micromatch4.isMatch(evalCase.id, filter2));
43124
43905
  }
43125
43906
  function buildEvaluatorRegistry(overrides, resolveJudgeProvider) {
43126
43907
  const llmJudge = overrides?.llm_judge ?? new LlmJudgeEvaluator({
@@ -43274,7 +44055,7 @@ function computeWeightedMean(entries) {
43274
44055
  }
43275
44056
  var rubricItemSchema = external_exports.object({
43276
44057
  id: external_exports.string().describe("Short identifier for this rubric (e.g., clarity, completeness)"),
43277
- description: external_exports.string().describe("What this rubric checks for"),
44058
+ expected_outcome: external_exports.string().describe("Concrete expected outcome for this rubric item"),
43278
44059
  weight: external_exports.number().default(1).describe("Relative importance (default 1.0)"),
43279
44060
  required: external_exports.boolean().default(true).describe("Whether this is a mandatory requirement")
43280
44061
  });
@@ -43294,7 +44075,7 @@ You must return a valid JSON object matching this schema:
43294
44075
  "rubrics": [
43295
44076
  {
43296
44077
  "id": "string (short identifier)",
43297
- "description": "string (what to check)",
44078
+ "expected_outcome": "string (concrete expected outcome for this rubric item)",
43298
44079
  "weight": number (default 1.0),
43299
44080
  "required": boolean (default true)
43300
44081
  }
@@ -43330,7 +44111,7 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
43330
44111
  "Each rubric should:",
43331
44112
  "- Be specific and testable",
43332
44113
  "- Have a short, descriptive ID",
43333
- "- Include a clear description of what to check",
44114
+ "- Include a clear expected outcome statement (what a good answer must demonstrate for this rubric)",
43334
44115
  "- Indicate if it is required (mandatory) or optional",
43335
44116
  "- Have an appropriate weight (default 1.0, use higher values for more important aspects)",
43336
44117
  "",
@@ -43395,7 +44176,7 @@ var convertCommand = command2({
43395
44176
  const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
43396
44177
  try {
43397
44178
  const count = convertJsonlToYaml(input, outputPath);
43398
- console.log(`Converted ${count} records to ${path15.resolve(outputPath)}`);
44179
+ console.log(`Converted ${count} records to ${path16.resolve(outputPath)}`);
43399
44180
  } catch (error40) {
43400
44181
  console.error(`Error: ${error40.message}`);
43401
44182
  process.exit(1);
@@ -43405,7 +44186,7 @@ var convertCommand = command2({
43405
44186
 
43406
44187
  // src/commands/eval/index.ts
43407
44188
  import { stat as stat4 } from "node:fs/promises";
43408
- import path24 from "node:path";
44189
+ import path25 from "node:path";
43409
44190
  import {
43410
44191
  command as command3,
43411
44192
  flag as flag2,
@@ -43420,19 +44201,19 @@ import fg from "fast-glob";
43420
44201
  // src/commands/eval/run-eval.ts
43421
44202
  import { constants as constants6 } from "node:fs";
43422
44203
  import { access as access6 } from "node:fs/promises";
43423
- import path21 from "node:path";
44204
+ import path24 from "node:path";
43424
44205
  import { pathToFileURL } from "node:url";
43425
44206
 
43426
44207
  // src/commands/eval/env.ts
43427
44208
  import { constants as constants4 } from "node:fs";
43428
44209
  import { access as access4 } from "node:fs/promises";
43429
- import path16 from "node:path";
44210
+ import path17 from "node:path";
43430
44211
  import { config as loadDotenv } from "dotenv";
43431
44212
  function uniqueDirs(directories) {
43432
44213
  const seen = /* @__PURE__ */ new Set();
43433
44214
  const result = [];
43434
44215
  for (const dir of directories) {
43435
- const absolute = path16.resolve(dir);
44216
+ const absolute = path17.resolve(dir);
43436
44217
  if (seen.has(absolute)) {
43437
44218
  continue;
43438
44219
  }
@@ -43451,14 +44232,14 @@ async function fileExists4(filePath) {
43451
44232
  }
43452
44233
  function collectAncestorDirectories(start, boundary) {
43453
44234
  const directories = [];
43454
- const boundaryDir = path16.resolve(boundary);
43455
- let current = path16.resolve(start);
44235
+ const boundaryDir = path17.resolve(boundary);
44236
+ let current = path17.resolve(start);
43456
44237
  while (current !== void 0) {
43457
44238
  directories.push(current);
43458
44239
  if (current === boundaryDir) {
43459
44240
  break;
43460
44241
  }
43461
- const parent = path16.dirname(current);
44242
+ const parent = path17.dirname(current);
43462
44243
  if (parent === current) {
43463
44244
  break;
43464
44245
  }
@@ -43468,12 +44249,12 @@ function collectAncestorDirectories(start, boundary) {
43468
44249
  }
43469
44250
  async function loadEnvFromHierarchy(options) {
43470
44251
  const { testFilePath, repoRoot, verbose } = options;
43471
- const testDir = path16.dirname(path16.resolve(testFilePath));
44252
+ const testDir = path17.dirname(path17.resolve(testFilePath));
43472
44253
  const cwd = process.cwd();
43473
44254
  const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
43474
44255
  const envFiles = [];
43475
44256
  for (const dir of searchDirs) {
43476
- const candidate = path16.join(dir, ".env");
44257
+ const candidate = path17.join(dir, ".env");
43477
44258
  if (await fileExists4(candidate)) {
43478
44259
  envFiles.push(candidate);
43479
44260
  }
@@ -43497,7 +44278,7 @@ async function loadEnvFromHierarchy(options) {
43497
44278
  // src/commands/eval/jsonl-writer.ts
43498
44279
  import { createWriteStream as createWriteStream4 } from "node:fs";
43499
44280
  import { mkdir as mkdir5 } from "node:fs/promises";
43500
- import path17 from "node:path";
44281
+ import path18 from "node:path";
43501
44282
  import { finished } from "node:stream/promises";
43502
44283
 
43503
44284
  // ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
@@ -43715,7 +44496,7 @@ var JsonlWriter = class _JsonlWriter {
43715
44496
  this.stream = stream;
43716
44497
  }
43717
44498
  static async open(filePath) {
43718
- await mkdir5(path17.dirname(filePath), { recursive: true });
44499
+ await mkdir5(path18.dirname(filePath), { recursive: true });
43719
44500
  const stream = createWriteStream4(filePath, { flags: "w", encoding: "utf8" });
43720
44501
  return new _JsonlWriter(stream);
43721
44502
  }
@@ -43748,7 +44529,7 @@ var JsonlWriter = class _JsonlWriter {
43748
44529
  // src/commands/eval/yaml-writer.ts
43749
44530
  import { createWriteStream as createWriteStream5 } from "node:fs";
43750
44531
  import { mkdir as mkdir6 } from "node:fs/promises";
43751
- import path18 from "node:path";
44532
+ import path19 from "node:path";
43752
44533
  import { finished as finished2 } from "node:stream/promises";
43753
44534
  import { stringify as stringifyYaml2 } from "yaml";
43754
44535
  var YamlWriter = class _YamlWriter {
@@ -43760,7 +44541,7 @@ var YamlWriter = class _YamlWriter {
43760
44541
  this.stream = stream;
43761
44542
  }
43762
44543
  static async open(filePath) {
43763
- await mkdir6(path18.dirname(filePath), { recursive: true });
44544
+ await mkdir6(path19.dirname(filePath), { recursive: true });
43764
44545
  const stream = createWriteStream5(filePath, { flags: "w", encoding: "utf8" });
43765
44546
  return new _YamlWriter(stream);
43766
44547
  }
@@ -43880,12 +44661,12 @@ var ProgressDisplay = class {
43880
44661
  }
43881
44662
  addLogPaths(paths, provider) {
43882
44663
  const newPaths = [];
43883
- for (const path29 of paths) {
43884
- if (this.logPathSet.has(path29)) {
44664
+ for (const path30 of paths) {
44665
+ if (this.logPathSet.has(path30)) {
43885
44666
  continue;
43886
44667
  }
43887
- this.logPathSet.add(path29);
43888
- newPaths.push(path29);
44668
+ this.logPathSet.add(path30);
44669
+ newPaths.push(path30);
43889
44670
  }
43890
44671
  if (newPaths.length === 0) {
43891
44672
  return;
@@ -43898,8 +44679,8 @@ var ProgressDisplay = class {
43898
44679
  this.hasPrintedLogHeader = true;
43899
44680
  }
43900
44681
  const startIndex = this.logPaths.length - newPaths.length;
43901
- newPaths.forEach((path29, offset) => {
43902
- console.log(`${startIndex + offset + 1}. ${path29}`);
44682
+ newPaths.forEach((path30, offset) => {
44683
+ console.log(`${startIndex + offset + 1}. ${path30}`);
43903
44684
  });
43904
44685
  }
43905
44686
  finish() {
@@ -44053,8 +44834,8 @@ function formatEvaluationSummary(summary) {
44053
44834
  }
44054
44835
 
44055
44836
  // ../../packages/core/dist/evaluation/validation/index.js
44056
- import { readFile as readFile7 } from "node:fs/promises";
44057
- import path19 from "node:path";
44837
+ import { readFile as readFile8 } from "node:fs/promises";
44838
+ import path20 from "node:path";
44058
44839
  import { parse as parse6 } from "yaml";
44059
44840
  import { readFile as readFile23 } from "node:fs/promises";
44060
44841
  import path23 from "node:path";
@@ -44072,7 +44853,7 @@ var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
44072
44853
  var SCHEMA_CONFIG_V2 = "agentv-config-v2";
44073
44854
  async function detectFileType(filePath) {
44074
44855
  try {
44075
- const content = await readFile7(filePath, "utf8");
44856
+ const content = await readFile8(filePath, "utf8");
44076
44857
  const parsed = parse6(content);
44077
44858
  if (typeof parsed !== "object" || parsed === null) {
44078
44859
  return inferFileTypeFromPath(filePath);
@@ -44097,8 +44878,8 @@ async function detectFileType(filePath) {
44097
44878
  }
44098
44879
  }
44099
44880
  function inferFileTypeFromPath(filePath) {
44100
- const normalized = path19.normalize(filePath).replace(/\\/g, "/");
44101
- const basename = path19.basename(filePath);
44881
+ const normalized = path20.normalize(filePath).replace(/\\/g, "/");
44882
+ const basename = path20.basename(filePath);
44102
44883
  if (normalized.includes("/.agentv/")) {
44103
44884
  if (basename === "config.yaml" || basename === "config.yml") {
44104
44885
  return "config";
@@ -44191,17 +44972,31 @@ async function validateEvalFile(filePath) {
44191
44972
  });
44192
44973
  }
44193
44974
  const inputMessages = evalCase.input_messages;
44194
- if (!Array.isArray(inputMessages)) {
44975
+ const inputAlias = evalCase.input;
44976
+ if (Array.isArray(inputMessages)) {
44977
+ validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
44978
+ } else if (inputAlias !== void 0) {
44979
+ if (typeof inputAlias === "string") {
44980
+ } else if (Array.isArray(inputAlias)) {
44981
+ validateMessages(inputAlias, `${location}.input`, absolutePath, errors);
44982
+ } else {
44983
+ errors.push({
44984
+ severity: "error",
44985
+ filePath: absolutePath,
44986
+ location: `${location}.input`,
44987
+ message: "Invalid 'input' field (must be a string or array of messages)"
44988
+ });
44989
+ }
44990
+ } else {
44195
44991
  errors.push({
44196
44992
  severity: "error",
44197
44993
  filePath: absolutePath,
44198
44994
  location: `${location}.input_messages`,
44199
- message: "Missing or invalid 'input_messages' field (must be an array)"
44995
+ message: "Missing 'input_messages' or 'input' field (must provide one)"
44200
44996
  });
44201
- } else {
44202
- validateMessages(inputMessages, `${location}.input_messages`, absolutePath, errors);
44203
44997
  }
44204
44998
  const expectedMessages = evalCase.expected_messages;
44999
+ const expectedOutputAlias = evalCase.expected_output;
44205
45000
  if (expectedMessages !== void 0 && !Array.isArray(expectedMessages)) {
44206
45001
  errors.push({
44207
45002
  severity: "error",
@@ -44211,6 +45006,26 @@ async function validateEvalFile(filePath) {
44211
45006
  });
44212
45007
  } else if (Array.isArray(expectedMessages)) {
44213
45008
  validateMessages(expectedMessages, `${location}.expected_messages`, absolutePath, errors);
45009
+ } else if (expectedOutputAlias !== void 0) {
45010
+ if (typeof expectedOutputAlias === "string") {
45011
+ } else if (Array.isArray(expectedOutputAlias)) {
45012
+ if (expectedOutputAlias.length > 0 && isObject2(expectedOutputAlias[0]) && "role" in expectedOutputAlias[0]) {
45013
+ validateMessages(
45014
+ expectedOutputAlias,
45015
+ `${location}.expected_output`,
45016
+ absolutePath,
45017
+ errors
45018
+ );
45019
+ }
45020
+ } else if (isObject2(expectedOutputAlias)) {
45021
+ } else {
45022
+ errors.push({
45023
+ severity: "error",
45024
+ filePath: absolutePath,
45025
+ location: `${location}.expected_output`,
45026
+ message: "Invalid 'expected_output' field (must be a string, object, or array)"
45027
+ });
45028
+ }
44214
45029
  }
44215
45030
  }
44216
45031
  return {
@@ -44863,12 +45678,12 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
44863
45678
  // src/utils/targets.ts
44864
45679
  import { constants as constants5 } from "node:fs";
44865
45680
  import { access as access5 } from "node:fs/promises";
44866
- import path20 from "node:path";
45681
+ import path21 from "node:path";
44867
45682
  var TARGET_FILE_CANDIDATES = [
44868
45683
  "targets.yaml",
44869
45684
  "targets.yml",
44870
- path20.join(".agentv", "targets.yaml"),
44871
- path20.join(".agentv", "targets.yml")
45685
+ path21.join(".agentv", "targets.yaml"),
45686
+ path21.join(".agentv", "targets.yml")
44872
45687
  ];
44873
45688
  async function fileExists5(filePath) {
44874
45689
  try {
@@ -44881,12 +45696,12 @@ async function fileExists5(filePath) {
44881
45696
  async function discoverTargetsFile(options) {
44882
45697
  const { explicitPath, testFilePath, repoRoot, cwd } = options;
44883
45698
  if (explicitPath) {
44884
- const resolvedExplicit = path20.resolve(explicitPath);
45699
+ const resolvedExplicit = path21.resolve(explicitPath);
44885
45700
  if (await fileExists5(resolvedExplicit)) {
44886
45701
  return resolvedExplicit;
44887
45702
  }
44888
45703
  for (const candidate of TARGET_FILE_CANDIDATES) {
44889
- const nested = path20.join(resolvedExplicit, candidate);
45704
+ const nested = path21.join(resolvedExplicit, candidate);
44890
45705
  if (await fileExists5(nested)) {
44891
45706
  return nested;
44892
45707
  }
@@ -44894,13 +45709,13 @@ async function discoverTargetsFile(options) {
44894
45709
  throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
44895
45710
  }
44896
45711
  const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
44897
- const resolvedCwd = path20.resolve(cwd);
45712
+ const resolvedCwd = path21.resolve(cwd);
44898
45713
  if (!directories.includes(resolvedCwd)) {
44899
45714
  directories.push(resolvedCwd);
44900
45715
  }
44901
45716
  for (const directory of directories) {
44902
45717
  for (const candidate of TARGET_FILE_CANDIDATES) {
44903
- const fullPath = path20.join(directory, candidate);
45718
+ const fullPath = path21.join(directory, candidate);
44904
45719
  if (await fileExists5(fullPath)) {
44905
45720
  return fullPath;
44906
45721
  }
@@ -44910,9 +45725,9 @@ async function discoverTargetsFile(options) {
44910
45725
  }
44911
45726
 
44912
45727
  // src/commands/eval/targets.ts
44913
- var ANSI_YELLOW7 = "\x1B[33m";
44914
- var ANSI_RED2 = "\x1B[31m";
44915
- var ANSI_RESET7 = "\x1B[0m";
45728
+ var ANSI_YELLOW8 = "\x1B[33m";
45729
+ var ANSI_RED3 = "\x1B[31m";
45730
+ var ANSI_RESET8 = "\x1B[0m";
44916
45731
  function isTTY() {
44917
45732
  return process.stdout.isTTY ?? false;
44918
45733
  }
@@ -44958,8 +45773,8 @@ async function selectTarget(options) {
44958
45773
  Warnings in ${targetsFilePath}:`);
44959
45774
  for (const warning of warnings) {
44960
45775
  const location = warning.location ? ` [${warning.location}]` : "";
44961
- const prefix = useColors ? `${ANSI_YELLOW7} \u26A0${ANSI_RESET7}` : " \u26A0";
44962
- const message = useColors ? `${ANSI_YELLOW7}${warning.message}${ANSI_RESET7}` : warning.message;
45776
+ const prefix = useColors ? `${ANSI_YELLOW8} \u26A0${ANSI_RESET8}` : " \u26A0";
45777
+ const message = useColors ? `${ANSI_YELLOW8}${warning.message}${ANSI_RESET8}` : warning.message;
44963
45778
  console.warn(`${prefix}${location} ${message}`);
44964
45779
  }
44965
45780
  console.warn("");
@@ -44970,8 +45785,8 @@ Warnings in ${targetsFilePath}:`);
44970
45785
  Errors in ${targetsFilePath}:`);
44971
45786
  for (const error40 of errors) {
44972
45787
  const location = error40.location ? ` [${error40.location}]` : "";
44973
- const prefix = useColors ? `${ANSI_RED2} \u2717${ANSI_RESET7}` : " \u2717";
44974
- const message = useColors ? `${ANSI_RED2}${error40.message}${ANSI_RESET7}` : error40.message;
45788
+ const prefix = useColors ? `${ANSI_RED3} \u2717${ANSI_RESET8}` : " \u2717";
45789
+ const message = useColors ? `${ANSI_RED3}${error40.message}${ANSI_RESET8}` : error40.message;
44975
45790
  console.error(`${prefix}${location} ${message}`);
44976
45791
  }
44977
45792
  throw new Error(`Targets file validation failed with ${errors.length} error(s)`);
@@ -45054,7 +45869,7 @@ function normalizeOptions(rawOptions) {
45054
45869
  return {
45055
45870
  target: normalizeString(rawOptions.target),
45056
45871
  targetsPath: normalizeString(rawOptions.targets),
45057
- evalId: normalizeString(rawOptions.evalId),
45872
+ filter: normalizeString(rawOptions.filter),
45058
45873
  workers: workers > 0 ? workers : void 0,
45059
45874
  outPath: normalizeString(rawOptions.out),
45060
45875
  format,
@@ -45076,15 +45891,15 @@ async function ensureFileExists(filePath, description) {
45076
45891
  }
45077
45892
  }
45078
45893
  async function findRepoRoot(start) {
45079
- const fallback = path21.resolve(start);
45894
+ const fallback = path24.resolve(start);
45080
45895
  let current = fallback;
45081
45896
  while (current !== void 0) {
45082
- const candidate = path21.join(current, ".git");
45897
+ const candidate = path24.join(current, ".git");
45083
45898
  try {
45084
45899
  await access6(candidate, constants6.F_OK);
45085
45900
  return current;
45086
45901
  } catch {
45087
- const parent = path21.dirname(current);
45902
+ const parent = path24.dirname(current);
45088
45903
  if (parent === current) {
45089
45904
  break;
45090
45905
  }
@@ -45097,7 +45912,7 @@ function buildDefaultOutputPath(cwd, format) {
45097
45912
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
45098
45913
  const baseName = "eval";
45099
45914
  const extension = getDefaultExtension(format);
45100
- return path21.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
45915
+ return path24.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
45101
45916
  }
45102
45917
  function createEvaluationCache() {
45103
45918
  const store = /* @__PURE__ */ new Map();
@@ -45122,7 +45937,7 @@ function createProgressReporter(maxWorkers, options) {
45122
45937
  };
45123
45938
  }
45124
45939
  function makeEvalKey(testFilePath, evalId) {
45125
- return `${path21.resolve(testFilePath)}::${evalId}`;
45940
+ return `${path24.resolve(testFilePath)}::${evalId}`;
45126
45941
  }
45127
45942
  function createDisplayIdTracker() {
45128
45943
  const map2 = /* @__PURE__ */ new Map();
@@ -45179,9 +45994,9 @@ async function prepareFileMetadata(params) {
45179
45994
  const inlineTargetLabel = `${selection.targetName} [provider=${providerLabel}]`;
45180
45995
  const evalCases = await loadEvalCases(testFilePath, repoRoot, {
45181
45996
  verbose: options.verbose,
45182
- evalId: options.evalId
45997
+ filter: options.filter
45183
45998
  });
45184
- const filteredIds = options.evalId ? evalCases.filter((value) => value.id === options.evalId).map((value) => value.id) : evalCases.map((value) => value.id);
45999
+ const filteredIds = evalCases.map((value) => value.id);
45185
46000
  return { evalIds: filteredIds, evalCases, selection, inlineTargetLabel };
45186
46001
  }
45187
46002
  async function runWithLimit(items, limit, task) {
@@ -45252,7 +46067,6 @@ async function runSingleEvalFile(params) {
45252
46067
  agentTimeoutMs,
45253
46068
  cache,
45254
46069
  useCache: options.cache,
45255
- evalId: options.evalId,
45256
46070
  evalCases,
45257
46071
  verbose: options.verbose,
45258
46072
  maxConcurrency: resolvedWorkers,
@@ -45286,14 +46100,14 @@ async function runEvalCommand(input) {
45286
46100
  if (options.verbose) {
45287
46101
  console.log(`Repository root: ${repoRoot}`);
45288
46102
  }
45289
- const outputPath = options.outPath ? path21.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
46103
+ const outputPath = options.outPath ? path24.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
45290
46104
  console.log(`Output path: ${outputPath}`);
45291
46105
  const outputWriter = await createOutputWriter(outputPath, options.format);
45292
46106
  const cache = options.cache ? createEvaluationCache() : void 0;
45293
46107
  const evaluationRunner = await resolveEvaluationRunner();
45294
46108
  const allResults = [];
45295
46109
  const seenEvalCases = /* @__PURE__ */ new Set();
45296
- const resolvedTestFiles = input.testFiles.map((file2) => path21.resolve(file2));
46110
+ const resolvedTestFiles = input.testFiles.map((file2) => path24.resolve(file2));
45297
46111
  const displayIdTracker = createDisplayIdTracker();
45298
46112
  const totalWorkers = options.workers ?? DEFAULT_WORKERS;
45299
46113
  const fileConcurrency = Math.min(
@@ -45392,7 +46206,7 @@ async function resolveEvaluationRunner() {
45392
46206
  if (!overridePath) {
45393
46207
  return runEvaluation;
45394
46208
  }
45395
- const resolved = path21.isAbsolute(overridePath) ? overridePath : path21.resolve(process.cwd(), overridePath);
46209
+ const resolved = path24.isAbsolute(overridePath) ? overridePath : path24.resolve(process.cwd(), overridePath);
45396
46210
  const moduleUrl = pathToFileURL(resolved).href;
45397
46211
  const mod = await import(moduleUrl);
45398
46212
  const candidate = mod.runEvaluation;
@@ -45428,7 +46242,7 @@ var evalCommand = command3({
45428
46242
  evalId: option3({
45429
46243
  type: optional4(string6),
45430
46244
  long: "eval-id",
45431
- description: "Run only the eval case with this identifier"
46245
+ description: 'Filter eval cases by ID pattern (glob supported, e.g., "summary-*")'
45432
46246
  }),
45433
46247
  workers: option3({
45434
46248
  type: number5,
@@ -45495,7 +46309,7 @@ var evalCommand = command3({
45495
46309
  const rawOptions = {
45496
46310
  target: args.target,
45497
46311
  targets: args.targets,
45498
- evalId: args.evalId,
46312
+ filter: args.evalId,
45499
46313
  workers: args.workers,
45500
46314
  out: args.out,
45501
46315
  outputFormat: args.outputFormat,
@@ -45519,10 +46333,10 @@ async function resolveEvalPaths(evalPaths, cwd) {
45519
46333
  const unmatched = [];
45520
46334
  const results = /* @__PURE__ */ new Set();
45521
46335
  for (const pattern of normalizedInputs) {
45522
- const candidatePath = path24.isAbsolute(pattern) ? path24.normalize(pattern) : path24.resolve(cwd, pattern);
46336
+ const candidatePath = path25.isAbsolute(pattern) ? path25.normalize(pattern) : path25.resolve(cwd, pattern);
45523
46337
  try {
45524
46338
  const stats = await stat4(candidatePath);
45525
- if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
46339
+ if (stats.isFile() && /\.(ya?ml|jsonl)$/i.test(candidatePath)) {
45526
46340
  results.add(candidatePath);
45527
46341
  continue;
45528
46342
  }
@@ -45537,20 +46351,20 @@ async function resolveEvalPaths(evalPaths, cwd) {
45537
46351
  dot: true,
45538
46352
  followSymbolicLinks: true
45539
46353
  });
45540
- const yamlMatches = matches.filter((filePath) => /\.ya?ml$/i.test(filePath));
46354
+ const yamlMatches = matches.filter((filePath) => /\.(ya?ml|jsonl)$/i.test(filePath));
45541
46355
  if (yamlMatches.length === 0) {
45542
46356
  unmatched.push(pattern);
45543
46357
  continue;
45544
46358
  }
45545
46359
  for (const filePath of yamlMatches) {
45546
- results.add(path24.normalize(filePath));
46360
+ results.add(path25.normalize(filePath));
45547
46361
  }
45548
46362
  }
45549
46363
  if (unmatched.length > 0) {
45550
46364
  throw new Error(
45551
46365
  `No eval files matched: ${unmatched.join(
45552
46366
  ", "
45553
- )}. Provide YAML paths or globs (e.g., "evals/**/*.yaml").`
46367
+ )}. Provide YAML or JSONL paths or globs (e.g., "evals/**/*.yaml", "evals/**/*.jsonl").`
45554
46368
  );
45555
46369
  }
45556
46370
  const sorted = Array.from(results);
@@ -45562,20 +46376,20 @@ async function resolveEvalPaths(evalPaths, cwd) {
45562
46376
  import { command as command4, flag as flag3, option as option4, optional as optional5, positional as positional4, string as string7, subcommands } from "cmd-ts";
45563
46377
 
45564
46378
  // src/commands/generate/rubrics.ts
45565
- import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
45566
- import path25 from "node:path";
46379
+ import { readFile as readFile9, writeFile as writeFile6 } from "node:fs/promises";
46380
+ import path26 from "node:path";
45567
46381
  import { pathToFileURL as pathToFileURL2 } from "node:url";
45568
46382
  import { isMap, isSeq, parseDocument } from "yaml";
45569
46383
  function isJsonObject3(value) {
45570
46384
  return typeof value === "object" && value !== null && !Array.isArray(value);
45571
46385
  }
45572
- function asString6(value) {
46386
+ function asString7(value) {
45573
46387
  return typeof value === "string" ? value : void 0;
45574
46388
  }
45575
46389
  async function loadRubricGenerator() {
45576
46390
  const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
45577
46391
  if (customGenerator) {
45578
- const generatorPath = path25.resolve(customGenerator);
46392
+ const generatorPath = path26.resolve(customGenerator);
45579
46393
  const generatorUrl = pathToFileURL2(generatorPath).href;
45580
46394
  const module = await import(generatorUrl);
45581
46395
  return module.generateRubrics;
@@ -45585,8 +46399,8 @@ async function loadRubricGenerator() {
45585
46399
  async function generateRubricsCommand(options) {
45586
46400
  const { file: file2, target: targetOverride, verbose } = options;
45587
46401
  console.log(`Generating rubrics for: ${file2}`);
45588
- const absolutePath = path25.resolve(file2);
45589
- const content = await readFile8(absolutePath, "utf8");
46402
+ const absolutePath = path26.resolve(file2);
46403
+ const content = await readFile9(absolutePath, "utf8");
45590
46404
  const doc = parseDocument(content);
45591
46405
  const parsed = doc.toJSON();
45592
46406
  if (!isJsonObject3(parsed)) {
@@ -45625,8 +46439,8 @@ async function generateRubricsCommand(options) {
45625
46439
  continue;
45626
46440
  }
45627
46441
  const evalCase = rawCase;
45628
- const id = asString6(evalCase.id) ?? "unknown";
45629
- const expectedOutcome = asString6(evalCase.expected_outcome) ?? asString6(evalCase.outcome);
46442
+ const id = asString7(evalCase.id) ?? "unknown";
46443
+ const expectedOutcome = asString7(evalCase.expected_outcome) ?? asString7(evalCase.outcome);
45630
46444
  if (!expectedOutcome) {
45631
46445
  if (verbose) {
45632
46446
  console.log(` Skipping ${id}: no expected_outcome`);
@@ -45643,7 +46457,7 @@ async function generateRubricsCommand(options) {
45643
46457
  }
45644
46458
  console.log(` Generating rubrics for: ${id}`);
45645
46459
  const question = extractQuestion(evalCase);
45646
- const referenceAnswer = asString6(evalCase.reference_answer);
46460
+ const referenceAnswer = asString7(evalCase.reference_answer);
45647
46461
  const rubrics = await generateRubricsFunc({
45648
46462
  expectedOutcome,
45649
46463
  question,
@@ -45654,14 +46468,12 @@ async function generateRubricsCommand(options) {
45654
46468
  if (caseNode && isMap(caseNode)) {
45655
46469
  caseNode.set(
45656
46470
  "rubrics",
45657
- rubrics.map(
45658
- (r) => ({
45659
- id: r.id,
45660
- description: r.description,
45661
- weight: r.weight,
45662
- required: r.required
45663
- })
45664
- )
46471
+ rubrics.filter((r) => r.expected_outcome !== void 0).map((r) => ({
46472
+ id: r.id,
46473
+ expected_outcome: r.expected_outcome,
46474
+ weight: r.weight,
46475
+ required: r.required ?? true
46476
+ }))
45665
46477
  );
45666
46478
  }
45667
46479
  updatedCount++;
@@ -45682,7 +46494,7 @@ Updated ${updatedCount} eval case(s) with generated rubrics`);
45682
46494
  }
45683
46495
  }
45684
46496
  function extractQuestion(evalCase) {
45685
- const explicitQuestion = asString6(evalCase.question);
46497
+ const explicitQuestion = asString7(evalCase.question);
45686
46498
  if (explicitQuestion) {
45687
46499
  return explicitQuestion;
45688
46500
  }
@@ -45746,24 +46558,24 @@ var generateCommand = subcommands({
45746
46558
 
45747
46559
  // src/commands/init/index.ts
45748
46560
  import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
45749
- import path27 from "node:path";
46561
+ import path28 from "node:path";
45750
46562
  import * as readline from "node:readline/promises";
45751
46563
  import { command as command5, option as option5, optional as optional6, string as string8 } from "cmd-ts";
45752
46564
 
45753
46565
  // src/templates/index.ts
45754
46566
  import { readFileSync as readFileSync3, readdirSync, statSync } from "node:fs";
45755
- import path26 from "node:path";
46567
+ import path27 from "node:path";
45756
46568
  import { fileURLToPath } from "node:url";
45757
46569
  function getGithubTemplates() {
45758
46570
  if (isDistRuntime()) {
45759
46571
  return getTemplatesFromDir(".github");
45760
46572
  }
45761
46573
  const templatesDir = getRepoRootFromDev();
45762
- const promptsDir = path26.join(templatesDir, ".github", "prompts");
46574
+ const promptsDir = path27.join(templatesDir, ".github", "prompts");
45763
46575
  const promptFiles = readdirSync(promptsDir).filter((file2) => file2.startsWith("agentv-"));
45764
46576
  return promptFiles.map((file2) => ({
45765
46577
  path: `prompts/${file2}`,
45766
- content: readFileSync3(path26.join(promptsDir, file2), "utf-8")
46578
+ content: readFileSync3(path27.join(promptsDir, file2), "utf-8")
45767
46579
  }));
45768
46580
  }
45769
46581
  function getAgentvTemplates() {
@@ -45774,47 +46586,47 @@ function getClaudeTemplates() {
45774
46586
  return getTemplatesFromDir(".claude");
45775
46587
  }
45776
46588
  const repoRoot = getRepoRootFromDev();
45777
- const skillsRoot = path26.join(repoRoot, ".claude", "skills");
46589
+ const skillsRoot = path27.join(repoRoot, ".claude", "skills");
45778
46590
  const skillsToInclude = ["agentv-eval-builder", "agentv-prompt-optimizer"];
45779
46591
  const templates = [];
45780
46592
  for (const skill of skillsToInclude) {
45781
- const skillDir = path26.join(skillsRoot, skill);
45782
- const skillTemplates = readTemplatesRecursively(skillDir, path26.join("skills", skill));
46593
+ const skillDir = path27.join(skillsRoot, skill);
46594
+ const skillTemplates = readTemplatesRecursively(skillDir, path27.join("skills", skill));
45783
46595
  templates.push(...skillTemplates);
45784
46596
  }
45785
46597
  return templates;
45786
46598
  }
45787
46599
  function getTemplatesFromDir(subdir) {
45788
- const currentDir = path26.dirname(fileURLToPath(import.meta.url));
46600
+ const currentDir = path27.dirname(fileURLToPath(import.meta.url));
45789
46601
  let templatesDir;
45790
- if (currentDir.includes(`${path26.sep}dist`)) {
45791
- templatesDir = path26.join(currentDir, "templates", subdir);
46602
+ if (currentDir.includes(`${path27.sep}dist`)) {
46603
+ templatesDir = path27.join(currentDir, "templates", subdir);
45792
46604
  } else {
45793
- templatesDir = path26.join(currentDir, subdir);
46605
+ templatesDir = path27.join(currentDir, subdir);
45794
46606
  }
45795
46607
  return readTemplatesRecursively(templatesDir, "");
45796
46608
  }
45797
46609
  function isDistRuntime() {
45798
- const currentDir = path26.dirname(fileURLToPath(import.meta.url));
45799
- return currentDir.includes(`${path26.sep}dist`);
46610
+ const currentDir = path27.dirname(fileURLToPath(import.meta.url));
46611
+ return currentDir.includes(`${path27.sep}dist`);
45800
46612
  }
45801
46613
  function getRepoRootFromDev() {
45802
- const currentDir = path26.dirname(fileURLToPath(import.meta.url));
45803
- return path26.resolve(currentDir, "..", "..", "..", "..");
46614
+ const currentDir = path27.dirname(fileURLToPath(import.meta.url));
46615
+ return path27.resolve(currentDir, "..", "..", "..", "..");
45804
46616
  }
45805
46617
  function readTemplatesRecursively(dir, relativePath) {
45806
46618
  const templates = [];
45807
46619
  const entries = readdirSync(dir);
45808
46620
  for (const entry of entries) {
45809
- const fullPath = path26.join(dir, entry);
46621
+ const fullPath = path27.join(dir, entry);
45810
46622
  const stat6 = statSync(fullPath);
45811
- const entryRelativePath = relativePath ? path26.join(relativePath, entry) : entry;
46623
+ const entryRelativePath = relativePath ? path27.join(relativePath, entry) : entry;
45812
46624
  if (stat6.isDirectory()) {
45813
46625
  templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
45814
46626
  } else {
45815
46627
  const content = readFileSync3(fullPath, "utf-8");
45816
46628
  templates.push({
45817
- path: entryRelativePath.split(path26.sep).join("/"),
46629
+ path: entryRelativePath.split(path27.sep).join("/"),
45818
46630
  // Normalize to forward slashes
45819
46631
  content
45820
46632
  });
@@ -45837,10 +46649,10 @@ async function promptYesNo(message) {
45837
46649
  }
45838
46650
  }
45839
46651
  async function initCommand(options = {}) {
45840
- const targetPath = path27.resolve(options.targetPath ?? ".");
45841
- const githubDir = path27.join(targetPath, ".github");
45842
- const agentvDir = path27.join(targetPath, ".agentv");
45843
- const claudeDir = path27.join(targetPath, ".claude");
46652
+ const targetPath = path28.resolve(options.targetPath ?? ".");
46653
+ const githubDir = path28.join(targetPath, ".github");
46654
+ const agentvDir = path28.join(targetPath, ".agentv");
46655
+ const claudeDir = path28.join(targetPath, ".claude");
45844
46656
  const githubTemplates = getGithubTemplates();
45845
46657
  const agentvTemplates = getAgentvTemplates();
45846
46658
  const claudeTemplates = getClaudeTemplates();
@@ -45848,32 +46660,32 @@ async function initCommand(options = {}) {
45848
46660
  const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.example");
45849
46661
  const existingFiles = [];
45850
46662
  if (envTemplate) {
45851
- const envFilePath = path27.join(targetPath, ".env.example");
46663
+ const envFilePath = path28.join(targetPath, ".env.example");
45852
46664
  if (existsSync(envFilePath)) {
45853
46665
  existingFiles.push(".env.example");
45854
46666
  }
45855
46667
  }
45856
46668
  if (existsSync(githubDir)) {
45857
46669
  for (const template of githubTemplates) {
45858
- const targetFilePath = path27.join(githubDir, template.path);
46670
+ const targetFilePath = path28.join(githubDir, template.path);
45859
46671
  if (existsSync(targetFilePath)) {
45860
- existingFiles.push(path27.relative(targetPath, targetFilePath));
46672
+ existingFiles.push(path28.relative(targetPath, targetFilePath));
45861
46673
  }
45862
46674
  }
45863
46675
  }
45864
46676
  if (existsSync(agentvDir)) {
45865
46677
  for (const template of otherAgentvTemplates) {
45866
- const targetFilePath = path27.join(agentvDir, template.path);
46678
+ const targetFilePath = path28.join(agentvDir, template.path);
45867
46679
  if (existsSync(targetFilePath)) {
45868
- existingFiles.push(path27.relative(targetPath, targetFilePath));
46680
+ existingFiles.push(path28.relative(targetPath, targetFilePath));
45869
46681
  }
45870
46682
  }
45871
46683
  }
45872
46684
  if (existsSync(claudeDir)) {
45873
46685
  for (const template of claudeTemplates) {
45874
- const targetFilePath = path27.join(claudeDir, template.path);
46686
+ const targetFilePath = path28.join(claudeDir, template.path);
45875
46687
  if (existsSync(targetFilePath)) {
45876
- existingFiles.push(path27.relative(targetPath, targetFilePath));
46688
+ existingFiles.push(path28.relative(targetPath, targetFilePath));
45877
46689
  }
45878
46690
  }
45879
46691
  }
@@ -45900,36 +46712,36 @@ async function initCommand(options = {}) {
45900
46712
  mkdirSync(claudeDir, { recursive: true });
45901
46713
  }
45902
46714
  if (envTemplate) {
45903
- const envFilePath = path27.join(targetPath, ".env.example");
46715
+ const envFilePath = path28.join(targetPath, ".env.example");
45904
46716
  writeFileSync2(envFilePath, envTemplate.content, "utf-8");
45905
46717
  console.log("Created .env.example");
45906
46718
  }
45907
46719
  for (const template of githubTemplates) {
45908
- const targetFilePath = path27.join(githubDir, template.path);
45909
- const targetDirPath = path27.dirname(targetFilePath);
46720
+ const targetFilePath = path28.join(githubDir, template.path);
46721
+ const targetDirPath = path28.dirname(targetFilePath);
45910
46722
  if (!existsSync(targetDirPath)) {
45911
46723
  mkdirSync(targetDirPath, { recursive: true });
45912
46724
  }
45913
46725
  writeFileSync2(targetFilePath, template.content, "utf-8");
45914
- console.log(`Created ${path27.relative(targetPath, targetFilePath)}`);
46726
+ console.log(`Created ${path28.relative(targetPath, targetFilePath)}`);
45915
46727
  }
45916
46728
  for (const template of otherAgentvTemplates) {
45917
- const targetFilePath = path27.join(agentvDir, template.path);
45918
- const targetDirPath = path27.dirname(targetFilePath);
46729
+ const targetFilePath = path28.join(agentvDir, template.path);
46730
+ const targetDirPath = path28.dirname(targetFilePath);
45919
46731
  if (!existsSync(targetDirPath)) {
45920
46732
  mkdirSync(targetDirPath, { recursive: true });
45921
46733
  }
45922
46734
  writeFileSync2(targetFilePath, template.content, "utf-8");
45923
- console.log(`Created ${path27.relative(targetPath, targetFilePath)}`);
46735
+ console.log(`Created ${path28.relative(targetPath, targetFilePath)}`);
45924
46736
  }
45925
46737
  for (const template of claudeTemplates) {
45926
- const targetFilePath = path27.join(claudeDir, template.path);
45927
- const targetDirPath = path27.dirname(targetFilePath);
46738
+ const targetFilePath = path28.join(claudeDir, template.path);
46739
+ const targetDirPath = path28.dirname(targetFilePath);
45928
46740
  if (!existsSync(targetDirPath)) {
45929
46741
  mkdirSync(targetDirPath, { recursive: true });
45930
46742
  }
45931
46743
  writeFileSync2(targetFilePath, template.content, "utf-8");
45932
- console.log(`Created ${path27.relative(targetPath, targetFilePath)}`);
46744
+ console.log(`Created ${path28.relative(targetPath, targetFilePath)}`);
45933
46745
  }
45934
46746
  console.log("\nAgentV initialized successfully!");
45935
46747
  console.log("\nFiles installed to root:");
@@ -45937,17 +46749,17 @@ async function initCommand(options = {}) {
45937
46749
  console.log(" - .env.example");
45938
46750
  }
45939
46751
  console.log(`
45940
- Files installed to ${path27.relative(targetPath, githubDir)}:`);
46752
+ Files installed to ${path28.relative(targetPath, githubDir)}:`);
45941
46753
  for (const t of githubTemplates) {
45942
46754
  console.log(` - ${t.path}`);
45943
46755
  }
45944
46756
  console.log(`
45945
- Files installed to ${path27.relative(targetPath, agentvDir)}:`);
46757
+ Files installed to ${path28.relative(targetPath, agentvDir)}:`);
45946
46758
  for (const t of otherAgentvTemplates) {
45947
46759
  console.log(` - ${t.path}`);
45948
46760
  }
45949
46761
  console.log(`
45950
- Files installed to ${path27.relative(targetPath, claudeDir)}:`);
46762
+ Files installed to ${path28.relative(targetPath, claudeDir)}:`);
45951
46763
  for (const t of claudeTemplates) {
45952
46764
  console.log(` - ${t.path}`);
45953
46765
  }
@@ -45980,12 +46792,12 @@ var initCmdTsCommand = command5({
45980
46792
  import { command as command6, restPositionals as restPositionals2, string as string9 } from "cmd-ts";
45981
46793
 
45982
46794
  // src/commands/validate/format-output.ts
45983
- var ANSI_RED3 = "\x1B[31m";
45984
- var ANSI_YELLOW8 = "\x1B[33m";
46795
+ var ANSI_RED4 = "\x1B[31m";
46796
+ var ANSI_YELLOW9 = "\x1B[33m";
45985
46797
  var ANSI_GREEN = "\x1B[32m";
45986
46798
  var ANSI_CYAN = "\x1B[36m";
45987
46799
  var ANSI_BOLD = "\x1B[1m";
45988
- var ANSI_RESET8 = "\x1B[0m";
46800
+ var ANSI_RESET9 = "\x1B[0m";
45989
46801
  function formatSummary(summary, useColors) {
45990
46802
  const lines = [];
45991
46803
  lines.push("");
@@ -46001,15 +46813,15 @@ function formatSummary(summary, useColors) {
46001
46813
  }
46002
46814
  function formatHeader(text2, useColors) {
46003
46815
  if (useColors) {
46004
- return `${ANSI_BOLD}${ANSI_CYAN}${text2}${ANSI_RESET8}`;
46816
+ return `${ANSI_BOLD}${ANSI_CYAN}${text2}${ANSI_RESET9}`;
46005
46817
  }
46006
46818
  return text2;
46007
46819
  }
46008
46820
  function formatFileResult(result, useColors) {
46009
46821
  const lines = [];
46010
46822
  const status = result.valid ? "\u2713" : "\u2717";
46011
- const statusColor = result.valid ? ANSI_GREEN : ANSI_RED3;
46012
- const statusText = useColors ? `${statusColor}${status}${ANSI_RESET8}` : status;
46823
+ const statusColor = result.valid ? ANSI_GREEN : ANSI_RED4;
46824
+ const statusText = useColors ? `${statusColor}${status}${ANSI_RESET9}` : status;
46013
46825
  const fileName = result.filePath;
46014
46826
  lines.push(`${statusText} ${fileName}`);
46015
46827
  if (result.errors.length > 0) {
@@ -46021,8 +46833,8 @@ function formatFileResult(result, useColors) {
46021
46833
  }
46022
46834
  function formatError2(error40, useColors) {
46023
46835
  const prefix = error40.severity === "error" ? " \u2717" : " \u26A0";
46024
- const color = error40.severity === "error" ? ANSI_RED3 : ANSI_YELLOW8;
46025
- const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET8}` : prefix;
46836
+ const color = error40.severity === "error" ? ANSI_RED4 : ANSI_YELLOW9;
46837
+ const coloredPrefix = useColors ? `${color}${prefix}${ANSI_RESET9}` : prefix;
46026
46838
  const location = error40.location ? ` [${error40.location}]` : "";
46027
46839
  return `${coloredPrefix}${location} ${error40.message}`;
46028
46840
  }
@@ -46035,15 +46847,15 @@ function formatStats(summary, useColors) {
46035
46847
  (r) => r.errors.some((e) => e.severity === "warning")
46036
46848
  ).length;
46037
46849
  if (useColors) {
46038
- lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET8}`);
46039
- lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET8}`);
46850
+ lines.push(`${ANSI_BOLD}${totalText}${ANSI_RESET9}`);
46851
+ lines.push(`${ANSI_GREEN}${validText}${ANSI_RESET9}`);
46040
46852
  if (summary.invalidFiles > 0) {
46041
- lines.push(`${ANSI_RED3}${invalidText}${ANSI_RESET8}`);
46853
+ lines.push(`${ANSI_RED4}${invalidText}${ANSI_RESET9}`);
46042
46854
  } else {
46043
46855
  lines.push(invalidText);
46044
46856
  }
46045
46857
  if (filesWithWarnings > 0) {
46046
- lines.push(`${ANSI_YELLOW8}Files with warnings: ${filesWithWarnings}${ANSI_RESET8}`);
46858
+ lines.push(`${ANSI_YELLOW9}Files with warnings: ${filesWithWarnings}${ANSI_RESET9}`);
46047
46859
  }
46048
46860
  } else {
46049
46861
  lines.push(totalText);
@@ -46062,7 +46874,7 @@ function isTTY2() {
46062
46874
  // src/commands/validate/validate-files.ts
46063
46875
  import { constants as constants7 } from "node:fs";
46064
46876
  import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
46065
- import path28 from "node:path";
46877
+ import path29 from "node:path";
46066
46878
  async function validateFiles(paths) {
46067
46879
  const filePaths = await expandPaths(paths);
46068
46880
  const results = [];
@@ -46080,7 +46892,7 @@ async function validateFiles(paths) {
46080
46892
  };
46081
46893
  }
46082
46894
  async function validateSingleFile(filePath) {
46083
- const absolutePath = path28.resolve(filePath);
46895
+ const absolutePath = path29.resolve(filePath);
46084
46896
  const fileType = await detectFileType(absolutePath);
46085
46897
  let result;
46086
46898
  if (fileType === "eval") {
@@ -46105,7 +46917,7 @@ async function validateSingleFile(filePath) {
46105
46917
  async function expandPaths(paths) {
46106
46918
  const expanded = [];
46107
46919
  for (const inputPath of paths) {
46108
- const absolutePath = path28.resolve(inputPath);
46920
+ const absolutePath = path29.resolve(inputPath);
46109
46921
  try {
46110
46922
  await access7(absolutePath, constants7.F_OK);
46111
46923
  } catch {
@@ -46129,7 +46941,7 @@ async function findYamlFiles(dirPath) {
46129
46941
  try {
46130
46942
  const entries = await readdir3(dirPath, { withFileTypes: true });
46131
46943
  for (const entry of entries) {
46132
- const fullPath = path28.join(dirPath, entry.name);
46944
+ const fullPath = path29.join(dirPath, entry.name);
46133
46945
  if (entry.isDirectory()) {
46134
46946
  if (entry.name === "node_modules" || entry.name.startsWith(".")) {
46135
46947
  continue;
@@ -46146,7 +46958,7 @@ async function findYamlFiles(dirPath) {
46146
46958
  return results;
46147
46959
  }
46148
46960
  function isYamlFile(filePath) {
46149
- const ext = path28.extname(filePath).toLowerCase();
46961
+ const ext = path29.extname(filePath).toLowerCase();
46150
46962
  return ext === ".yaml" || ext === ".yml";
46151
46963
  }
46152
46964
 
@@ -46206,4 +47018,4 @@ export {
46206
47018
  app,
46207
47019
  runCli
46208
47020
  };
46209
- //# sourceMappingURL=chunk-HTTN5OWL.js.map
47021
+ //# sourceMappingURL=chunk-XREH4WAJ.js.map