agentv 0.20.1 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -0
- package/dist/{chunk-GDGNKNKP.js → chunk-MA3MJNJH.js} +556 -87
- package/dist/chunk-MA3MJNJH.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/package.json +2 -5
- package/dist/chunk-GDGNKNKP.js.map +0 -1
|
@@ -155,7 +155,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
|
|
|
155
155
|
import path18 from "node:path";
|
|
156
156
|
import { pathToFileURL } from "node:url";
|
|
157
157
|
|
|
158
|
-
// ../../packages/core/dist/chunk-
|
|
158
|
+
// ../../packages/core/dist/chunk-BO7KG7JX.js
|
|
159
159
|
import { constants } from "node:fs";
|
|
160
160
|
import { access, readFile } from "node:fs/promises";
|
|
161
161
|
import path from "node:path";
|
|
@@ -638,8 +638,8 @@ function getErrorMap() {
|
|
|
638
638
|
|
|
639
639
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
640
640
|
var makeIssue = (params) => {
|
|
641
|
-
const { data, path:
|
|
642
|
-
const fullPath = [...
|
|
641
|
+
const { data, path: path26, errorMaps, issueData } = params;
|
|
642
|
+
const fullPath = [...path26, ...issueData.path || []];
|
|
643
643
|
const fullIssue = {
|
|
644
644
|
...issueData,
|
|
645
645
|
path: fullPath
|
|
@@ -755,11 +755,11 @@ var errorUtil;
|
|
|
755
755
|
|
|
756
756
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
|
|
757
757
|
var ParseInputLazyPath = class {
|
|
758
|
-
constructor(parent, value,
|
|
758
|
+
constructor(parent, value, path26, key2) {
|
|
759
759
|
this._cachedPath = [];
|
|
760
760
|
this.parent = parent;
|
|
761
761
|
this.data = value;
|
|
762
|
-
this._path =
|
|
762
|
+
this._path = path26;
|
|
763
763
|
this._key = key2;
|
|
764
764
|
}
|
|
765
765
|
get path() {
|
|
@@ -4201,7 +4201,7 @@ var coerce = {
|
|
|
4201
4201
|
};
|
|
4202
4202
|
var NEVER = INVALID;
|
|
4203
4203
|
|
|
4204
|
-
// ../../packages/core/dist/chunk-
|
|
4204
|
+
// ../../packages/core/dist/chunk-BO7KG7JX.js
|
|
4205
4205
|
async function fileExists(filePath) {
|
|
4206
4206
|
try {
|
|
4207
4207
|
await access(filePath, constants.F_OK);
|
|
@@ -5976,10 +5976,10 @@ function assignProp(target, prop, value) {
|
|
|
5976
5976
|
configurable: true
|
|
5977
5977
|
});
|
|
5978
5978
|
}
|
|
5979
|
-
function getElementAtPath(obj,
|
|
5980
|
-
if (!
|
|
5979
|
+
function getElementAtPath(obj, path26) {
|
|
5980
|
+
if (!path26)
|
|
5981
5981
|
return obj;
|
|
5982
|
-
return
|
|
5982
|
+
return path26.reduce((acc, key2) => acc?.[key2], obj);
|
|
5983
5983
|
}
|
|
5984
5984
|
function promiseAllObject(promisesObj) {
|
|
5985
5985
|
const keys = Object.keys(promisesObj);
|
|
@@ -6299,11 +6299,11 @@ function aborted(x, startIndex = 0) {
|
|
|
6299
6299
|
}
|
|
6300
6300
|
return false;
|
|
6301
6301
|
}
|
|
6302
|
-
function prefixIssues(
|
|
6302
|
+
function prefixIssues(path26, issues) {
|
|
6303
6303
|
return issues.map((iss) => {
|
|
6304
6304
|
var _a17;
|
|
6305
6305
|
(_a17 = iss).path ?? (_a17.path = []);
|
|
6306
|
-
iss.path.unshift(
|
|
6306
|
+
iss.path.unshift(path26);
|
|
6307
6307
|
return iss;
|
|
6308
6308
|
});
|
|
6309
6309
|
}
|
|
@@ -6440,7 +6440,7 @@ function treeifyError(error40, _mapper) {
|
|
|
6440
6440
|
return issue2.message;
|
|
6441
6441
|
};
|
|
6442
6442
|
const result = { errors: [] };
|
|
6443
|
-
const processError = (error41,
|
|
6443
|
+
const processError = (error41, path26 = []) => {
|
|
6444
6444
|
var _a17, _b8;
|
|
6445
6445
|
for (const issue2 of error41.issues) {
|
|
6446
6446
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -6450,7 +6450,7 @@ function treeifyError(error40, _mapper) {
|
|
|
6450
6450
|
} else if (issue2.code === "invalid_element") {
|
|
6451
6451
|
processError({ issues: issue2.issues }, issue2.path);
|
|
6452
6452
|
} else {
|
|
6453
|
-
const fullpath = [...
|
|
6453
|
+
const fullpath = [...path26, ...issue2.path];
|
|
6454
6454
|
if (fullpath.length === 0) {
|
|
6455
6455
|
result.errors.push(mapper(issue2));
|
|
6456
6456
|
continue;
|
|
@@ -6480,9 +6480,9 @@ function treeifyError(error40, _mapper) {
|
|
|
6480
6480
|
processError(error40);
|
|
6481
6481
|
return result;
|
|
6482
6482
|
}
|
|
6483
|
-
function toDotPath(
|
|
6483
|
+
function toDotPath(path26) {
|
|
6484
6484
|
const segs = [];
|
|
6485
|
-
for (const seg of
|
|
6485
|
+
for (const seg of path26) {
|
|
6486
6486
|
if (typeof seg === "number")
|
|
6487
6487
|
segs.push(`[${seg}]`);
|
|
6488
6488
|
else if (typeof seg === "symbol")
|
|
@@ -26035,14 +26035,14 @@ function createAzure(options = {}) {
|
|
|
26035
26035
|
description: "Azure OpenAI resource name"
|
|
26036
26036
|
});
|
|
26037
26037
|
const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
|
|
26038
|
-
const url2 = ({ path:
|
|
26038
|
+
const url2 = ({ path: path26, modelId }) => {
|
|
26039
26039
|
var _a24;
|
|
26040
26040
|
const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
|
|
26041
26041
|
let fullUrl;
|
|
26042
26042
|
if (options.useDeploymentBasedUrls) {
|
|
26043
|
-
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${
|
|
26043
|
+
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path26}`);
|
|
26044
26044
|
} else {
|
|
26045
|
-
fullUrl = new URL(`${baseUrlPrefix}/v1${
|
|
26045
|
+
fullUrl = new URL(`${baseUrlPrefix}/v1${path26}`);
|
|
26046
26046
|
}
|
|
26047
26047
|
fullUrl.searchParams.set("api-version", apiVersion);
|
|
26048
26048
|
return fullUrl.toString();
|
|
@@ -34553,7 +34553,7 @@ function isTestMessage(value) {
|
|
|
34553
34553
|
}
|
|
34554
34554
|
return candidate.content.every(isJsonObject);
|
|
34555
34555
|
}
|
|
34556
|
-
var EVALUATOR_KIND_VALUES = ["code", "llm_judge"];
|
|
34556
|
+
var EVALUATOR_KIND_VALUES = ["code", "llm_judge", "rubric"];
|
|
34557
34557
|
var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
|
|
34558
34558
|
function isEvaluatorKind(value) {
|
|
34559
34559
|
return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
|
|
@@ -34920,6 +34920,29 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
|
|
|
34920
34920
|
}
|
|
34921
34921
|
}
|
|
34922
34922
|
const _model = asString2(rawEvaluator.model);
|
|
34923
|
+
if (typeValue === "rubric") {
|
|
34924
|
+
const rubrics = rawEvaluator.rubrics;
|
|
34925
|
+
if (!Array.isArray(rubrics)) {
|
|
34926
|
+
logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': missing rubrics array`);
|
|
34927
|
+
continue;
|
|
34928
|
+
}
|
|
34929
|
+
const parsedRubrics = rubrics.filter((r) => isJsonObject2(r)).map((rubric, index) => ({
|
|
34930
|
+
id: asString2(rubric.id) ?? `rubric-${index + 1}`,
|
|
34931
|
+
description: asString2(rubric.description) ?? "",
|
|
34932
|
+
weight: typeof rubric.weight === "number" ? rubric.weight : 1,
|
|
34933
|
+
required: typeof rubric.required === "boolean" ? rubric.required : true
|
|
34934
|
+
})).filter((r) => r.description.length > 0);
|
|
34935
|
+
if (parsedRubrics.length === 0) {
|
|
34936
|
+
logWarning2(`Skipping rubric evaluator '${name16}' in '${evalId}': no valid rubrics found`);
|
|
34937
|
+
continue;
|
|
34938
|
+
}
|
|
34939
|
+
evaluators.push({
|
|
34940
|
+
name: name16,
|
|
34941
|
+
type: "rubric",
|
|
34942
|
+
rubrics: parsedRubrics
|
|
34943
|
+
});
|
|
34944
|
+
continue;
|
|
34945
|
+
}
|
|
34923
34946
|
evaluators.push({
|
|
34924
34947
|
name: name16,
|
|
34925
34948
|
type: "llm_judge",
|
|
@@ -35390,7 +35413,7 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
35390
35413
|
continue;
|
|
35391
35414
|
}
|
|
35392
35415
|
const conversationId = asString5(evalcase.conversation_id);
|
|
35393
|
-
const outcome = asString5(evalcase.outcome);
|
|
35416
|
+
const outcome = asString5(evalcase.expected_outcome) ?? asString5(evalcase.outcome);
|
|
35394
35417
|
const inputMessagesValue = evalcase.input_messages;
|
|
35395
35418
|
const expectedMessagesValue = evalcase.expected_messages;
|
|
35396
35419
|
if (!id || !outcome || !Array.isArray(inputMessagesValue)) {
|
|
@@ -35444,6 +35467,33 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
35444
35467
|
logError(`Skipping eval case '${id}': ${message}`);
|
|
35445
35468
|
continue;
|
|
35446
35469
|
}
|
|
35470
|
+
const inlineRubrics = evalcase.rubrics;
|
|
35471
|
+
if (inlineRubrics !== void 0 && Array.isArray(inlineRubrics)) {
|
|
35472
|
+
const rubricItems = inlineRubrics.filter((r) => isJsonObject(r) || typeof r === "string").map((rubric, index) => {
|
|
35473
|
+
if (typeof rubric === "string") {
|
|
35474
|
+
return {
|
|
35475
|
+
id: `rubric-${index + 1}`,
|
|
35476
|
+
description: rubric,
|
|
35477
|
+
weight: 1,
|
|
35478
|
+
required: true
|
|
35479
|
+
};
|
|
35480
|
+
}
|
|
35481
|
+
return {
|
|
35482
|
+
id: asString5(rubric.id) ?? `rubric-${index + 1}`,
|
|
35483
|
+
description: asString5(rubric.description) ?? "",
|
|
35484
|
+
weight: typeof rubric.weight === "number" ? rubric.weight : 1,
|
|
35485
|
+
required: typeof rubric.required === "boolean" ? rubric.required : true
|
|
35486
|
+
};
|
|
35487
|
+
}).filter((r) => r.description.length > 0);
|
|
35488
|
+
if (rubricItems.length > 0) {
|
|
35489
|
+
const rubricEvaluator = {
|
|
35490
|
+
name: "rubric",
|
|
35491
|
+
type: "rubric",
|
|
35492
|
+
rubrics: rubricItems
|
|
35493
|
+
};
|
|
35494
|
+
evaluators = evaluators ? [rubricEvaluator, ...evaluators] : [rubricEvaluator];
|
|
35495
|
+
}
|
|
35496
|
+
}
|
|
35447
35497
|
const userFilePaths = [];
|
|
35448
35498
|
for (const segment of inputSegments) {
|
|
35449
35499
|
if (segment.type === "file" && typeof segment.resolvedPath === "string") {
|
|
@@ -35536,6 +35586,9 @@ var AzureProvider = class {
|
|
|
35536
35586
|
retryConfig: this.retryConfig
|
|
35537
35587
|
});
|
|
35538
35588
|
}
|
|
35589
|
+
asLanguageModel() {
|
|
35590
|
+
return this.model;
|
|
35591
|
+
}
|
|
35539
35592
|
};
|
|
35540
35593
|
var AnthropicProvider = class {
|
|
35541
35594
|
constructor(targetName, config2) {
|
|
@@ -35569,6 +35622,9 @@ var AnthropicProvider = class {
|
|
|
35569
35622
|
providerOptions
|
|
35570
35623
|
});
|
|
35571
35624
|
}
|
|
35625
|
+
asLanguageModel() {
|
|
35626
|
+
return this.model;
|
|
35627
|
+
}
|
|
35572
35628
|
};
|
|
35573
35629
|
var GeminiProvider = class {
|
|
35574
35630
|
constructor(targetName, config2) {
|
|
@@ -35599,6 +35655,9 @@ var GeminiProvider = class {
|
|
|
35599
35655
|
retryConfig: this.retryConfig
|
|
35600
35656
|
});
|
|
35601
35657
|
}
|
|
35658
|
+
asLanguageModel() {
|
|
35659
|
+
return this.model;
|
|
35660
|
+
}
|
|
35602
35661
|
};
|
|
35603
35662
|
function buildAzureOptions(config2) {
|
|
35604
35663
|
const options = {
|
|
@@ -37262,6 +37321,144 @@ function createProvider(target) {
|
|
|
37262
37321
|
}
|
|
37263
37322
|
}
|
|
37264
37323
|
}
|
|
37324
|
+
var rubricCheckResultSchema = external_exports.object({
|
|
37325
|
+
id: external_exports.string().describe("The ID of the rubric item being checked"),
|
|
37326
|
+
satisfied: external_exports.boolean().describe("Whether this rubric requirement is met"),
|
|
37327
|
+
reasoning: external_exports.string().describe("Brief explanation (1-2 sentences) for this check")
|
|
37328
|
+
});
|
|
37329
|
+
var rubricEvaluationSchema = external_exports.object({
|
|
37330
|
+
checks: external_exports.array(rubricCheckResultSchema).describe("Results for each rubric item"),
|
|
37331
|
+
overall_reasoning: external_exports.string().describe("Overall assessment summary (1-2 sentences)")
|
|
37332
|
+
});
|
|
37333
|
+
var RubricEvaluator = class {
|
|
37334
|
+
kind = "rubric";
|
|
37335
|
+
config;
|
|
37336
|
+
resolveJudgeProvider;
|
|
37337
|
+
constructor(options) {
|
|
37338
|
+
this.config = options.config;
|
|
37339
|
+
this.resolveJudgeProvider = options.resolveJudgeProvider;
|
|
37340
|
+
}
|
|
37341
|
+
async evaluate(context) {
|
|
37342
|
+
const judgeProvider = await this.resolveJudgeProvider(context);
|
|
37343
|
+
if (!judgeProvider) {
|
|
37344
|
+
throw new Error("No judge provider available for rubric evaluation");
|
|
37345
|
+
}
|
|
37346
|
+
if (!this.config.rubrics || this.config.rubrics.length === 0) {
|
|
37347
|
+
throw new Error(
|
|
37348
|
+
`No rubrics found for evaluator "${this.config.name}". Run "agentv generate rubrics" first.`
|
|
37349
|
+
);
|
|
37350
|
+
}
|
|
37351
|
+
const prompt = this.buildPrompt(context, this.config.rubrics);
|
|
37352
|
+
const model = judgeProvider.asLanguageModel?.();
|
|
37353
|
+
if (!model) {
|
|
37354
|
+
throw new Error("Judge provider does not support language model interface");
|
|
37355
|
+
}
|
|
37356
|
+
const system = `You are an expert evaluator. Evaluate the candidate answer against each rubric item.
|
|
37357
|
+
You must return a valid JSON object matching this schema:
|
|
37358
|
+
{
|
|
37359
|
+
"checks": [
|
|
37360
|
+
{
|
|
37361
|
+
"id": "string (rubric id)",
|
|
37362
|
+
"satisfied": boolean,
|
|
37363
|
+
"reasoning": "string (brief explanation)"
|
|
37364
|
+
}
|
|
37365
|
+
],
|
|
37366
|
+
"overall_reasoning": "string (summary)"
|
|
37367
|
+
}`;
|
|
37368
|
+
let result;
|
|
37369
|
+
let lastError;
|
|
37370
|
+
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
37371
|
+
try {
|
|
37372
|
+
const { text: text2 } = await generateText({
|
|
37373
|
+
model,
|
|
37374
|
+
system,
|
|
37375
|
+
prompt
|
|
37376
|
+
});
|
|
37377
|
+
const cleaned = text2.replace(/```json\n?|```/g, "").trim();
|
|
37378
|
+
result = rubricEvaluationSchema.parse(JSON.parse(cleaned));
|
|
37379
|
+
break;
|
|
37380
|
+
} catch (e) {
|
|
37381
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
37382
|
+
}
|
|
37383
|
+
}
|
|
37384
|
+
if (!result) {
|
|
37385
|
+
throw new Error(
|
|
37386
|
+
`Failed to parse rubric evaluation result after 3 attempts: ${lastError?.message}`
|
|
37387
|
+
);
|
|
37388
|
+
}
|
|
37389
|
+
const { score, verdict, hits, misses } = this.calculateScore(result, this.config.rubrics);
|
|
37390
|
+
return {
|
|
37391
|
+
score,
|
|
37392
|
+
verdict,
|
|
37393
|
+
hits,
|
|
37394
|
+
misses,
|
|
37395
|
+
expectedAspectCount: this.config.rubrics.length,
|
|
37396
|
+
reasoning: result.overall_reasoning,
|
|
37397
|
+
evaluatorRawRequest: {
|
|
37398
|
+
prompt
|
|
37399
|
+
}
|
|
37400
|
+
};
|
|
37401
|
+
}
|
|
37402
|
+
buildPrompt(context, rubrics) {
|
|
37403
|
+
const parts = [
|
|
37404
|
+
"You are an expert evaluator. Evaluate the candidate answer against each rubric item below.",
|
|
37405
|
+
"",
|
|
37406
|
+
"[[ ## question ## ]]",
|
|
37407
|
+
context.evalCase.question,
|
|
37408
|
+
"",
|
|
37409
|
+
"[[ ## expected_outcome ## ]]",
|
|
37410
|
+
context.evalCase.expected_outcome,
|
|
37411
|
+
""
|
|
37412
|
+
];
|
|
37413
|
+
if (context.evalCase.reference_answer && context.evalCase.reference_answer.trim().length > 0) {
|
|
37414
|
+
parts.push("[[ ## reference_answer ## ]]", context.evalCase.reference_answer, "");
|
|
37415
|
+
}
|
|
37416
|
+
parts.push("[[ ## candidate_answer ## ]]", context.candidate, "", "[[ ## rubrics ## ]]");
|
|
37417
|
+
for (const rubric of rubrics) {
|
|
37418
|
+
const requiredLabel = rubric.required ? " (REQUIRED)" : "";
|
|
37419
|
+
const weightLabel = rubric.weight !== 1 ? ` (weight: ${rubric.weight})` : "";
|
|
37420
|
+
parts.push(`- [${rubric.id}]${requiredLabel}${weightLabel}: ${rubric.description}`);
|
|
37421
|
+
}
|
|
37422
|
+
parts.push("", "For each rubric, determine if it is satisfied and provide brief reasoning.");
|
|
37423
|
+
return parts.join("\n");
|
|
37424
|
+
}
|
|
37425
|
+
calculateScore(result, rubrics) {
|
|
37426
|
+
const rubricMap = new Map(rubrics.map((r) => [r.id, r]));
|
|
37427
|
+
const hits = [];
|
|
37428
|
+
const misses = [];
|
|
37429
|
+
let totalWeight = 0;
|
|
37430
|
+
let earnedWeight = 0;
|
|
37431
|
+
let failedRequired = false;
|
|
37432
|
+
for (const check2 of result.checks) {
|
|
37433
|
+
const rubric = rubricMap.get(check2.id);
|
|
37434
|
+
if (!rubric) {
|
|
37435
|
+
continue;
|
|
37436
|
+
}
|
|
37437
|
+
totalWeight += rubric.weight;
|
|
37438
|
+
if (check2.satisfied) {
|
|
37439
|
+
earnedWeight += rubric.weight;
|
|
37440
|
+
hits.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
|
|
37441
|
+
} else {
|
|
37442
|
+
misses.push(`[${rubric.id}] ${rubric.description}: ${check2.reasoning}`);
|
|
37443
|
+
if (rubric.required) {
|
|
37444
|
+
failedRequired = true;
|
|
37445
|
+
}
|
|
37446
|
+
}
|
|
37447
|
+
}
|
|
37448
|
+
const score = totalWeight > 0 ? Math.min(1, Math.max(0, earnedWeight / totalWeight)) : 0;
|
|
37449
|
+
let verdict;
|
|
37450
|
+
if (failedRequired) {
|
|
37451
|
+
verdict = "fail";
|
|
37452
|
+
} else if (score >= 0.8) {
|
|
37453
|
+
verdict = "pass";
|
|
37454
|
+
} else if (score >= 0.6) {
|
|
37455
|
+
verdict = "borderline";
|
|
37456
|
+
} else {
|
|
37457
|
+
verdict = "fail";
|
|
37458
|
+
}
|
|
37459
|
+
return { score, verdict, hits, misses };
|
|
37460
|
+
}
|
|
37461
|
+
};
|
|
37265
37462
|
var DEFAULT_EVALUATOR_TEMPLATE = `You are an expert evaluator. Your goal is to grade the candidate_answer based on how well it achieves the expected_outcome for the original task.
|
|
37266
37463
|
|
|
37267
37464
|
Use the reference_answer as a gold standard for a high-quality response (if provided). The candidate_answer does not need to match it verbatim, but should capture the key points and follow the same spirit.
|
|
@@ -38214,6 +38411,7 @@ async function runEvaluatorList(options) {
|
|
|
38214
38411
|
name: evaluator.name,
|
|
38215
38412
|
type: evaluator.type,
|
|
38216
38413
|
score: score2.score,
|
|
38414
|
+
verdict: score2.verdict,
|
|
38217
38415
|
hits: score2.hits,
|
|
38218
38416
|
misses: score2.misses,
|
|
38219
38417
|
reasoning: score2.reasoning,
|
|
@@ -38241,6 +38439,40 @@ async function runEvaluatorList(options) {
|
|
|
38241
38439
|
name: evaluator.name,
|
|
38242
38440
|
type: evaluator.type,
|
|
38243
38441
|
score: score2.score,
|
|
38442
|
+
verdict: score2.verdict,
|
|
38443
|
+
hits: score2.hits,
|
|
38444
|
+
misses: score2.misses,
|
|
38445
|
+
reasoning: score2.reasoning,
|
|
38446
|
+
evaluator_provider_request: score2.evaluatorRawRequest
|
|
38447
|
+
});
|
|
38448
|
+
continue;
|
|
38449
|
+
}
|
|
38450
|
+
if (evaluator.type === "rubric") {
|
|
38451
|
+
const rubricEvaluator = new RubricEvaluator({
|
|
38452
|
+
config: evaluator,
|
|
38453
|
+
resolveJudgeProvider: async (context) => {
|
|
38454
|
+
if (context.judgeProvider) {
|
|
38455
|
+
return context.judgeProvider;
|
|
38456
|
+
}
|
|
38457
|
+
return judgeProvider;
|
|
38458
|
+
}
|
|
38459
|
+
});
|
|
38460
|
+
const score2 = await rubricEvaluator.evaluate({
|
|
38461
|
+
evalCase,
|
|
38462
|
+
candidate,
|
|
38463
|
+
target,
|
|
38464
|
+
provider,
|
|
38465
|
+
attempt,
|
|
38466
|
+
promptInputs,
|
|
38467
|
+
now,
|
|
38468
|
+
judgeProvider
|
|
38469
|
+
});
|
|
38470
|
+
scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
|
|
38471
|
+
evaluatorResults.push({
|
|
38472
|
+
name: evaluator.name,
|
|
38473
|
+
type: evaluator.type,
|
|
38474
|
+
score: score2.score,
|
|
38475
|
+
verdict: score2.verdict,
|
|
38244
38476
|
hits: score2.hits,
|
|
38245
38477
|
misses: score2.misses,
|
|
38246
38478
|
reasoning: score2.reasoning,
|
|
@@ -38470,6 +38702,82 @@ function isTimeoutLike(error40) {
|
|
|
38470
38702
|
const value = String(error40).toLowerCase();
|
|
38471
38703
|
return value.includes("timeout");
|
|
38472
38704
|
}
|
|
38705
|
+
var rubricItemSchema = external_exports.object({
|
|
38706
|
+
id: external_exports.string().describe("Short identifier for this rubric (e.g., clarity, completeness)"),
|
|
38707
|
+
description: external_exports.string().describe("What this rubric checks for"),
|
|
38708
|
+
weight: external_exports.number().default(1).describe("Relative importance (default 1.0)"),
|
|
38709
|
+
required: external_exports.boolean().default(true).describe("Whether this is a mandatory requirement")
|
|
38710
|
+
});
|
|
38711
|
+
var rubricGenerationSchema = external_exports.object({
|
|
38712
|
+
rubrics: external_exports.array(rubricItemSchema).describe("List of evaluation rubrics")
|
|
38713
|
+
});
|
|
38714
|
+
async function generateRubrics(options) {
|
|
38715
|
+
const { expectedOutcome, question, referenceAnswer, provider } = options;
|
|
38716
|
+
const prompt = buildPrompt(expectedOutcome, question, referenceAnswer);
|
|
38717
|
+
const model = provider.asLanguageModel?.();
|
|
38718
|
+
if (!model) {
|
|
38719
|
+
throw new Error("Provider does not support language model interface");
|
|
38720
|
+
}
|
|
38721
|
+
const system = `You are an expert at creating evaluation rubrics.
|
|
38722
|
+
You must return a valid JSON object matching this schema:
|
|
38723
|
+
{
|
|
38724
|
+
"rubrics": [
|
|
38725
|
+
{
|
|
38726
|
+
"id": "string (short identifier)",
|
|
38727
|
+
"description": "string (what to check)",
|
|
38728
|
+
"weight": number (default 1.0),
|
|
38729
|
+
"required": boolean (default true)
|
|
38730
|
+
}
|
|
38731
|
+
]
|
|
38732
|
+
}`;
|
|
38733
|
+
let result;
|
|
38734
|
+
let lastError;
|
|
38735
|
+
for (let attempt = 1; attempt <= 3; attempt++) {
|
|
38736
|
+
try {
|
|
38737
|
+
const { text: text2 } = await generateText({
|
|
38738
|
+
model,
|
|
38739
|
+
system,
|
|
38740
|
+
prompt
|
|
38741
|
+
});
|
|
38742
|
+
const cleaned = text2.replace(/```json\n?|```/g, "").trim();
|
|
38743
|
+
result = rubricGenerationSchema.parse(JSON.parse(cleaned));
|
|
38744
|
+
break;
|
|
38745
|
+
} catch (e) {
|
|
38746
|
+
lastError = e instanceof Error ? e : new Error(String(e));
|
|
38747
|
+
}
|
|
38748
|
+
}
|
|
38749
|
+
if (!result) {
|
|
38750
|
+
throw new Error(`Failed to parse generated rubrics after 3 attempts: ${lastError?.message}`);
|
|
38751
|
+
}
|
|
38752
|
+
return result.rubrics;
|
|
38753
|
+
}
|
|
38754
|
+
function buildPrompt(expectedOutcome, question, referenceAnswer) {
|
|
38755
|
+
const parts = [
|
|
38756
|
+
"You are an expert at creating evaluation rubrics.",
|
|
38757
|
+
"Given the expected outcome (and optionally the question and reference answer),",
|
|
38758
|
+
"generate a list of specific, measurable rubric items to evaluate whether an answer meets the expected outcome.",
|
|
38759
|
+
"",
|
|
38760
|
+
"Each rubric should:",
|
|
38761
|
+
"- Be specific and testable",
|
|
38762
|
+
"- Have a short, descriptive ID",
|
|
38763
|
+
"- Include a clear description of what to check",
|
|
38764
|
+
"- Indicate if it is required (mandatory) or optional",
|
|
38765
|
+
"- Have an appropriate weight (default 1.0, use higher values for more important aspects)",
|
|
38766
|
+
"",
|
|
38767
|
+
"Generate 3-7 rubric items that comprehensively cover the expected outcome.",
|
|
38768
|
+
"",
|
|
38769
|
+
"[[ ## expected_outcome ## ]]",
|
|
38770
|
+
expectedOutcome,
|
|
38771
|
+
""
|
|
38772
|
+
];
|
|
38773
|
+
if (question && question.trim().length > 0) {
|
|
38774
|
+
parts.push("[[ ## question ## ]]", question, "");
|
|
38775
|
+
}
|
|
38776
|
+
if (referenceAnswer && referenceAnswer.trim().length > 0) {
|
|
38777
|
+
parts.push("[[ ## reference_answer ## ]]", referenceAnswer, "");
|
|
38778
|
+
}
|
|
38779
|
+
return parts.join("\n");
|
|
38780
|
+
}
|
|
38473
38781
|
function createAgentKernel() {
|
|
38474
38782
|
return { status: "stub" };
|
|
38475
38783
|
}
|
|
@@ -38927,12 +39235,12 @@ var ProgressDisplay = class {
|
|
|
38927
39235
|
}
|
|
38928
39236
|
addLogPaths(paths) {
|
|
38929
39237
|
const newPaths = [];
|
|
38930
|
-
for (const
|
|
38931
|
-
if (this.logPathSet.has(
|
|
39238
|
+
for (const path26 of paths) {
|
|
39239
|
+
if (this.logPathSet.has(path26)) {
|
|
38932
39240
|
continue;
|
|
38933
39241
|
}
|
|
38934
|
-
this.logPathSet.add(
|
|
38935
|
-
newPaths.push(
|
|
39242
|
+
this.logPathSet.add(path26);
|
|
39243
|
+
newPaths.push(path26);
|
|
38936
39244
|
}
|
|
38937
39245
|
if (newPaths.length === 0) {
|
|
38938
39246
|
return;
|
|
@@ -38948,8 +39256,8 @@ var ProgressDisplay = class {
|
|
|
38948
39256
|
this.hasPrintedLogHeader = true;
|
|
38949
39257
|
}
|
|
38950
39258
|
const startIndex = this.logPaths.length - newPaths.length;
|
|
38951
|
-
newPaths.forEach((
|
|
38952
|
-
console.log(`${startIndex + offset + 1}. ${
|
|
39259
|
+
newPaths.forEach((path26, offset) => {
|
|
39260
|
+
console.log(`${startIndex + offset + 1}. ${path26}`);
|
|
38953
39261
|
});
|
|
38954
39262
|
}
|
|
38955
39263
|
scheduleRender() {
|
|
@@ -38997,8 +39305,8 @@ var ProgressDisplay = class {
|
|
|
38997
39305
|
if (this.logPaths.length > 0) {
|
|
38998
39306
|
lines.push("");
|
|
38999
39307
|
lines.push("Codex CLI logs:");
|
|
39000
|
-
this.logPaths.forEach((
|
|
39001
|
-
lines.push(`${index + 1}. ${
|
|
39308
|
+
this.logPaths.forEach((path26, index) => {
|
|
39309
|
+
lines.push(`${index + 1}. ${path26}`);
|
|
39002
39310
|
});
|
|
39003
39311
|
}
|
|
39004
39312
|
const rowCount = this.getRenderedRowCount(lines);
|
|
@@ -39203,11 +39511,6 @@ function formatEvaluationSummary(summary) {
|
|
|
39203
39511
|
return lines.join("\n");
|
|
39204
39512
|
}
|
|
39205
39513
|
|
|
39206
|
-
// src/commands/eval/targets.ts
|
|
39207
|
-
import { constants as constants5 } from "node:fs";
|
|
39208
|
-
import { access as access5 } from "node:fs/promises";
|
|
39209
|
-
import path17 from "node:path";
|
|
39210
|
-
|
|
39211
39514
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
39212
39515
|
import { readFile as readFile7 } from "node:fs/promises";
|
|
39213
39516
|
import { parse as parse6 } from "yaml";
|
|
@@ -39323,13 +39626,13 @@ async function validateEvalFile(filePath) {
|
|
|
39323
39626
|
message: "Missing or invalid 'id' field (must be a non-empty string)"
|
|
39324
39627
|
});
|
|
39325
39628
|
}
|
|
39326
|
-
const
|
|
39327
|
-
if (typeof
|
|
39629
|
+
const expectedOutcome = evalCase.expected_outcome ?? evalCase.outcome;
|
|
39630
|
+
if (expectedOutcome !== void 0 && (typeof expectedOutcome !== "string" || expectedOutcome.trim().length === 0)) {
|
|
39328
39631
|
errors.push({
|
|
39329
39632
|
severity: "error",
|
|
39330
39633
|
filePath: absolutePath,
|
|
39331
|
-
location: `${location}.
|
|
39332
|
-
message: "
|
|
39634
|
+
location: `${location}.expected_outcome`,
|
|
39635
|
+
message: "Invalid 'expected_outcome' or 'outcome' field (must be a non-empty string if provided)"
|
|
39333
39636
|
});
|
|
39334
39637
|
}
|
|
39335
39638
|
const inputMessages = evalCase.input_messages;
|
|
@@ -40064,19 +40367,16 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
40064
40367
|
}
|
|
40065
40368
|
}
|
|
40066
40369
|
|
|
40067
|
-
// src/
|
|
40370
|
+
// src/utils/targets.ts
|
|
40371
|
+
import { constants as constants5 } from "node:fs";
|
|
40372
|
+
import { access as access5 } from "node:fs/promises";
|
|
40373
|
+
import path17 from "node:path";
|
|
40068
40374
|
var TARGET_FILE_CANDIDATES = [
|
|
40069
40375
|
"targets.yaml",
|
|
40070
40376
|
"targets.yml",
|
|
40071
40377
|
path17.join(".agentv", "targets.yaml"),
|
|
40072
40378
|
path17.join(".agentv", "targets.yml")
|
|
40073
40379
|
];
|
|
40074
|
-
var ANSI_YELLOW7 = "\x1B[33m";
|
|
40075
|
-
var ANSI_RED2 = "\x1B[31m";
|
|
40076
|
-
var ANSI_RESET7 = "\x1B[0m";
|
|
40077
|
-
function isTTY() {
|
|
40078
|
-
return process.stdout.isTTY ?? false;
|
|
40079
|
-
}
|
|
40080
40380
|
async function fileExists5(filePath) {
|
|
40081
40381
|
try {
|
|
40082
40382
|
await access5(filePath, constants5.F_OK);
|
|
@@ -40085,10 +40385,6 @@ async function fileExists5(filePath) {
|
|
|
40085
40385
|
return false;
|
|
40086
40386
|
}
|
|
40087
40387
|
}
|
|
40088
|
-
async function readTestSuiteTarget(testFilePath) {
|
|
40089
|
-
const metadata = await readTestSuiteMetadata(testFilePath);
|
|
40090
|
-
return metadata.target;
|
|
40091
|
-
}
|
|
40092
40388
|
async function discoverTargetsFile(options) {
|
|
40093
40389
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
40094
40390
|
if (explicitPath) {
|
|
@@ -40119,6 +40415,18 @@ async function discoverTargetsFile(options) {
|
|
|
40119
40415
|
}
|
|
40120
40416
|
throw new Error("Unable to locate targets.yaml. Use --targets to specify the file explicitly.");
|
|
40121
40417
|
}
|
|
40418
|
+
|
|
40419
|
+
// src/commands/eval/targets.ts
|
|
40420
|
+
var ANSI_YELLOW7 = "\x1B[33m";
|
|
40421
|
+
var ANSI_RED2 = "\x1B[31m";
|
|
40422
|
+
var ANSI_RESET7 = "\x1B[0m";
|
|
40423
|
+
function isTTY() {
|
|
40424
|
+
return process.stdout.isTTY ?? false;
|
|
40425
|
+
}
|
|
40426
|
+
async function readTestSuiteTarget(testFilePath) {
|
|
40427
|
+
const metadata = await readTestSuiteMetadata(testFilePath);
|
|
40428
|
+
return metadata.target;
|
|
40429
|
+
}
|
|
40122
40430
|
function pickTargetName(options) {
|
|
40123
40431
|
const cliName = options.cliTargetName?.trim();
|
|
40124
40432
|
if (cliName && cliName !== "default") {
|
|
@@ -40705,14 +41013,174 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
40705
41013
|
return sorted;
|
|
40706
41014
|
}
|
|
40707
41015
|
|
|
41016
|
+
// src/commands/generate/rubrics.ts
|
|
41017
|
+
import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
|
|
41018
|
+
import path20 from "node:path";
|
|
41019
|
+
import { pathToFileURL as pathToFileURL2 } from "node:url";
|
|
41020
|
+
import { isMap, isSeq, parseDocument } from "yaml";
|
|
41021
|
+
function isJsonObject3(value) {
|
|
41022
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
41023
|
+
}
|
|
41024
|
+
function asString6(value) {
|
|
41025
|
+
return typeof value === "string" ? value : void 0;
|
|
41026
|
+
}
|
|
41027
|
+
async function loadRubricGenerator() {
|
|
41028
|
+
const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
|
|
41029
|
+
if (customGenerator) {
|
|
41030
|
+
const generatorPath = path20.resolve(customGenerator);
|
|
41031
|
+
const generatorUrl = pathToFileURL2(generatorPath).href;
|
|
41032
|
+
const module = await import(generatorUrl);
|
|
41033
|
+
return module.generateRubrics;
|
|
41034
|
+
}
|
|
41035
|
+
return generateRubrics;
|
|
41036
|
+
}
|
|
41037
|
+
async function generateRubricsCommand(options) {
|
|
41038
|
+
const { file: file2, target: targetOverride, verbose } = options;
|
|
41039
|
+
console.log(`Generating rubrics for: ${file2}`);
|
|
41040
|
+
const absolutePath = path20.resolve(file2);
|
|
41041
|
+
const content = await readFile8(absolutePath, "utf8");
|
|
41042
|
+
const doc = parseDocument(content);
|
|
41043
|
+
const parsed = doc.toJSON();
|
|
41044
|
+
if (!isJsonObject3(parsed)) {
|
|
41045
|
+
throw new Error(`Invalid YAML file format: ${file2}`);
|
|
41046
|
+
}
|
|
41047
|
+
const suite = parsed;
|
|
41048
|
+
const evalcases = suite.evalcases;
|
|
41049
|
+
if (!Array.isArray(evalcases)) {
|
|
41050
|
+
throw new Error(`No evalcases found in ${file2}`);
|
|
41051
|
+
}
|
|
41052
|
+
const targetSelection = await selectTarget({
|
|
41053
|
+
testFilePath: absolutePath,
|
|
41054
|
+
repoRoot: process.cwd(),
|
|
41055
|
+
cwd: process.cwd(),
|
|
41056
|
+
cliTargetName: targetOverride,
|
|
41057
|
+
dryRun: false,
|
|
41058
|
+
dryRunDelay: 0,
|
|
41059
|
+
dryRunDelayMin: 0,
|
|
41060
|
+
dryRunDelayMax: 0,
|
|
41061
|
+
env: process.env
|
|
41062
|
+
});
|
|
41063
|
+
if (verbose) {
|
|
41064
|
+
console.log(`Using target: ${targetSelection.targetName}`);
|
|
41065
|
+
}
|
|
41066
|
+
const provider = createProvider(targetSelection.resolvedTarget);
|
|
41067
|
+
const generateRubricsFunc = await loadRubricGenerator();
|
|
41068
|
+
let updatedCount = 0;
|
|
41069
|
+
let skippedCount = 0;
|
|
41070
|
+
const evalcasesNode = doc.getIn(["evalcases"]);
|
|
41071
|
+
if (!evalcasesNode || !isSeq(evalcasesNode)) {
|
|
41072
|
+
throw new Error("evalcases must be a sequence");
|
|
41073
|
+
}
|
|
41074
|
+
for (let i = 0; i < evalcases.length; i++) {
|
|
41075
|
+
const rawCase = evalcases[i];
|
|
41076
|
+
if (!isJsonObject3(rawCase)) {
|
|
41077
|
+
continue;
|
|
41078
|
+
}
|
|
41079
|
+
const evalCase = rawCase;
|
|
41080
|
+
const id = asString6(evalCase.id) ?? "unknown";
|
|
41081
|
+
const expectedOutcome = asString6(evalCase.expected_outcome) ?? asString6(evalCase.outcome);
|
|
41082
|
+
if (!expectedOutcome) {
|
|
41083
|
+
if (verbose) {
|
|
41084
|
+
console.log(` Skipping ${id}: no expected_outcome`);
|
|
41085
|
+
}
|
|
41086
|
+
skippedCount++;
|
|
41087
|
+
continue;
|
|
41088
|
+
}
|
|
41089
|
+
if (evalCase.rubrics !== void 0) {
|
|
41090
|
+
if (verbose) {
|
|
41091
|
+
console.log(` Skipping ${id}: rubrics already defined`);
|
|
41092
|
+
}
|
|
41093
|
+
skippedCount++;
|
|
41094
|
+
continue;
|
|
41095
|
+
}
|
|
41096
|
+
console.log(` Generating rubrics for: ${id}`);
|
|
41097
|
+
const question = extractQuestion(evalCase);
|
|
41098
|
+
const referenceAnswer = asString6(evalCase.reference_answer);
|
|
41099
|
+
const rubrics = await generateRubricsFunc({
|
|
41100
|
+
expectedOutcome,
|
|
41101
|
+
question,
|
|
41102
|
+
referenceAnswer,
|
|
41103
|
+
provider
|
|
41104
|
+
});
|
|
41105
|
+
const caseNode = evalcasesNode.items[i];
|
|
41106
|
+
if (caseNode && isMap(caseNode)) {
|
|
41107
|
+
caseNode.set(
|
|
41108
|
+
"rubrics",
|
|
41109
|
+
rubrics.map(
|
|
41110
|
+
(r) => ({
|
|
41111
|
+
id: r.id,
|
|
41112
|
+
description: r.description,
|
|
41113
|
+
weight: r.weight,
|
|
41114
|
+
required: r.required
|
|
41115
|
+
})
|
|
41116
|
+
)
|
|
41117
|
+
);
|
|
41118
|
+
}
|
|
41119
|
+
updatedCount++;
|
|
41120
|
+
if (verbose) {
|
|
41121
|
+
console.log(` Generated ${rubrics.length} rubric(s)`);
|
|
41122
|
+
}
|
|
41123
|
+
}
|
|
41124
|
+
if (updatedCount > 0) {
|
|
41125
|
+
const output = doc.toString();
|
|
41126
|
+
await writeFile6(absolutePath, output, "utf8");
|
|
41127
|
+
console.log(`
|
|
41128
|
+
Updated ${updatedCount} eval case(s) with generated rubrics`);
|
|
41129
|
+
if (skippedCount > 0) {
|
|
41130
|
+
console.log(`Skipped ${skippedCount} eval case(s)`);
|
|
41131
|
+
}
|
|
41132
|
+
} else {
|
|
41133
|
+
console.log("\nNo eval cases updated (all already have rubrics or missing expected_outcome)");
|
|
41134
|
+
}
|
|
41135
|
+
}
|
|
41136
|
+
function extractQuestion(evalCase) {
|
|
41137
|
+
const explicitQuestion = asString6(evalCase.question);
|
|
41138
|
+
if (explicitQuestion) {
|
|
41139
|
+
return explicitQuestion;
|
|
41140
|
+
}
|
|
41141
|
+
const inputMessages = evalCase.input_messages;
|
|
41142
|
+
if (!Array.isArray(inputMessages)) {
|
|
41143
|
+
return void 0;
|
|
41144
|
+
}
|
|
41145
|
+
for (const msg of inputMessages) {
|
|
41146
|
+
if (!isJsonObject3(msg)) {
|
|
41147
|
+
continue;
|
|
41148
|
+
}
|
|
41149
|
+
if (msg.role === "user" && typeof msg.content === "string") {
|
|
41150
|
+
return msg.content;
|
|
41151
|
+
}
|
|
41152
|
+
}
|
|
41153
|
+
return void 0;
|
|
41154
|
+
}
|
|
41155
|
+
|
|
41156
|
+
// src/commands/generate/index.ts
|
|
41157
|
+
function registerGenerateCommand(program) {
|
|
41158
|
+
const generate = program.command("generate").description("Generate evaluation artifacts");
|
|
41159
|
+
generate.command("rubrics <file>").description("Generate rubrics from expected_outcome in YAML eval file").option(
|
|
41160
|
+
"-t, --target <target>",
|
|
41161
|
+
"Override target for rubric generation (default: file target or openai:gpt-4o)"
|
|
41162
|
+
).option("-v, --verbose", "Show detailed progress").action(async (file2, options) => {
|
|
41163
|
+
try {
|
|
41164
|
+
await generateRubricsCommand({
|
|
41165
|
+
file: file2,
|
|
41166
|
+
target: options.target,
|
|
41167
|
+
verbose: options.verbose
|
|
41168
|
+
});
|
|
41169
|
+
} catch (error40) {
|
|
41170
|
+
console.error(`Error: ${error40.message}`);
|
|
41171
|
+
process.exit(1);
|
|
41172
|
+
}
|
|
41173
|
+
});
|
|
41174
|
+
}
|
|
41175
|
+
|
|
40708
41176
|
// src/commands/init/index.ts
|
|
40709
41177
|
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
40710
|
-
import
|
|
41178
|
+
import path24 from "node:path";
|
|
40711
41179
|
import * as readline from "node:readline/promises";
|
|
40712
41180
|
|
|
40713
41181
|
// src/templates/index.ts
|
|
40714
41182
|
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
40715
|
-
import
|
|
41183
|
+
import path21 from "node:path";
|
|
40716
41184
|
import { fileURLToPath } from "node:url";
|
|
40717
41185
|
function getGithubTemplates() {
|
|
40718
41186
|
return getTemplatesFromDir(".github");
|
|
@@ -40724,12 +41192,12 @@ function getClaudeTemplates() {
|
|
|
40724
41192
|
return getTemplatesFromDir(".claude");
|
|
40725
41193
|
}
|
|
40726
41194
|
function getTemplatesFromDir(subdir) {
|
|
40727
|
-
const currentDir =
|
|
41195
|
+
const currentDir = path21.dirname(fileURLToPath(import.meta.url));
|
|
40728
41196
|
let templatesDir;
|
|
40729
|
-
if (currentDir.includes(`${
|
|
40730
|
-
templatesDir =
|
|
41197
|
+
if (currentDir.includes(`${path21.sep}dist`)) {
|
|
41198
|
+
templatesDir = path21.join(currentDir, "templates", subdir);
|
|
40731
41199
|
} else {
|
|
40732
|
-
templatesDir =
|
|
41200
|
+
templatesDir = path21.join(currentDir, subdir);
|
|
40733
41201
|
}
|
|
40734
41202
|
return readTemplatesRecursively(templatesDir, "");
|
|
40735
41203
|
}
|
|
@@ -40737,15 +41205,15 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
40737
41205
|
const templates = [];
|
|
40738
41206
|
const entries = readdirSync(dir);
|
|
40739
41207
|
for (const entry of entries) {
|
|
40740
|
-
const fullPath =
|
|
41208
|
+
const fullPath = path21.join(dir, entry);
|
|
40741
41209
|
const stat6 = statSync(fullPath);
|
|
40742
|
-
const entryRelativePath = relativePath ?
|
|
41210
|
+
const entryRelativePath = relativePath ? path21.join(relativePath, entry) : entry;
|
|
40743
41211
|
if (stat6.isDirectory()) {
|
|
40744
41212
|
templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
|
|
40745
41213
|
} else {
|
|
40746
41214
|
const content = readFileSync(fullPath, "utf-8");
|
|
40747
41215
|
templates.push({
|
|
40748
|
-
path: entryRelativePath.split(
|
|
41216
|
+
path: entryRelativePath.split(path21.sep).join("/"),
|
|
40749
41217
|
// Normalize to forward slashes
|
|
40750
41218
|
content
|
|
40751
41219
|
});
|
|
@@ -40768,10 +41236,10 @@ async function promptYesNo(message) {
|
|
|
40768
41236
|
}
|
|
40769
41237
|
}
|
|
40770
41238
|
async function initCommand(options = {}) {
|
|
40771
|
-
const targetPath =
|
|
40772
|
-
const githubDir =
|
|
40773
|
-
const agentvDir =
|
|
40774
|
-
const claudeDir =
|
|
41239
|
+
const targetPath = path24.resolve(options.targetPath ?? ".");
|
|
41240
|
+
const githubDir = path24.join(targetPath, ".github");
|
|
41241
|
+
const agentvDir = path24.join(targetPath, ".agentv");
|
|
41242
|
+
const claudeDir = path24.join(targetPath, ".claude");
|
|
40775
41243
|
const githubTemplates = getGithubTemplates();
|
|
40776
41244
|
const agentvTemplates = getAgentvTemplates();
|
|
40777
41245
|
const claudeTemplates = getClaudeTemplates();
|
|
@@ -40779,32 +41247,32 @@ async function initCommand(options = {}) {
|
|
|
40779
41247
|
const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
|
|
40780
41248
|
const existingFiles = [];
|
|
40781
41249
|
if (envTemplate) {
|
|
40782
|
-
const envFilePath =
|
|
41250
|
+
const envFilePath = path24.join(targetPath, ".env.template");
|
|
40783
41251
|
if (existsSync(envFilePath)) {
|
|
40784
41252
|
existingFiles.push(".env.template");
|
|
40785
41253
|
}
|
|
40786
41254
|
}
|
|
40787
41255
|
if (existsSync(githubDir)) {
|
|
40788
41256
|
for (const template of githubTemplates) {
|
|
40789
|
-
const targetFilePath =
|
|
41257
|
+
const targetFilePath = path24.join(githubDir, template.path);
|
|
40790
41258
|
if (existsSync(targetFilePath)) {
|
|
40791
|
-
existingFiles.push(
|
|
41259
|
+
existingFiles.push(path24.relative(targetPath, targetFilePath));
|
|
40792
41260
|
}
|
|
40793
41261
|
}
|
|
40794
41262
|
}
|
|
40795
41263
|
if (existsSync(agentvDir)) {
|
|
40796
41264
|
for (const template of otherAgentvTemplates) {
|
|
40797
|
-
const targetFilePath =
|
|
41265
|
+
const targetFilePath = path24.join(agentvDir, template.path);
|
|
40798
41266
|
if (existsSync(targetFilePath)) {
|
|
40799
|
-
existingFiles.push(
|
|
41267
|
+
existingFiles.push(path24.relative(targetPath, targetFilePath));
|
|
40800
41268
|
}
|
|
40801
41269
|
}
|
|
40802
41270
|
}
|
|
40803
41271
|
if (existsSync(claudeDir)) {
|
|
40804
41272
|
for (const template of claudeTemplates) {
|
|
40805
|
-
const targetFilePath =
|
|
41273
|
+
const targetFilePath = path24.join(claudeDir, template.path);
|
|
40806
41274
|
if (existsSync(targetFilePath)) {
|
|
40807
|
-
existingFiles.push(
|
|
41275
|
+
existingFiles.push(path24.relative(targetPath, targetFilePath));
|
|
40808
41276
|
}
|
|
40809
41277
|
}
|
|
40810
41278
|
}
|
|
@@ -40831,36 +41299,36 @@ async function initCommand(options = {}) {
|
|
|
40831
41299
|
mkdirSync(claudeDir, { recursive: true });
|
|
40832
41300
|
}
|
|
40833
41301
|
if (envTemplate) {
|
|
40834
|
-
const envFilePath =
|
|
41302
|
+
const envFilePath = path24.join(targetPath, ".env.template");
|
|
40835
41303
|
writeFileSync(envFilePath, envTemplate.content, "utf-8");
|
|
40836
41304
|
console.log("Created .env.template");
|
|
40837
41305
|
}
|
|
40838
41306
|
for (const template of githubTemplates) {
|
|
40839
|
-
const targetFilePath =
|
|
40840
|
-
const targetDirPath =
|
|
41307
|
+
const targetFilePath = path24.join(githubDir, template.path);
|
|
41308
|
+
const targetDirPath = path24.dirname(targetFilePath);
|
|
40841
41309
|
if (!existsSync(targetDirPath)) {
|
|
40842
41310
|
mkdirSync(targetDirPath, { recursive: true });
|
|
40843
41311
|
}
|
|
40844
41312
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
40845
|
-
console.log(`Created ${
|
|
41313
|
+
console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
|
|
40846
41314
|
}
|
|
40847
41315
|
for (const template of otherAgentvTemplates) {
|
|
40848
|
-
const targetFilePath =
|
|
40849
|
-
const targetDirPath =
|
|
41316
|
+
const targetFilePath = path24.join(agentvDir, template.path);
|
|
41317
|
+
const targetDirPath = path24.dirname(targetFilePath);
|
|
40850
41318
|
if (!existsSync(targetDirPath)) {
|
|
40851
41319
|
mkdirSync(targetDirPath, { recursive: true });
|
|
40852
41320
|
}
|
|
40853
41321
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
40854
|
-
console.log(`Created ${
|
|
41322
|
+
console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
|
|
40855
41323
|
}
|
|
40856
41324
|
for (const template of claudeTemplates) {
|
|
40857
|
-
const targetFilePath =
|
|
40858
|
-
const targetDirPath =
|
|
41325
|
+
const targetFilePath = path24.join(claudeDir, template.path);
|
|
41326
|
+
const targetDirPath = path24.dirname(targetFilePath);
|
|
40859
41327
|
if (!existsSync(targetDirPath)) {
|
|
40860
41328
|
mkdirSync(targetDirPath, { recursive: true });
|
|
40861
41329
|
}
|
|
40862
41330
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
40863
|
-
console.log(`Created ${
|
|
41331
|
+
console.log(`Created ${path24.relative(targetPath, targetFilePath)}`);
|
|
40864
41332
|
}
|
|
40865
41333
|
console.log("\nAgentV initialized successfully!");
|
|
40866
41334
|
console.log("\nFiles installed to root:");
|
|
@@ -40868,17 +41336,17 @@ async function initCommand(options = {}) {
|
|
|
40868
41336
|
console.log(" - .env.template");
|
|
40869
41337
|
}
|
|
40870
41338
|
console.log(`
|
|
40871
|
-
Files installed to ${
|
|
41339
|
+
Files installed to ${path24.relative(targetPath, githubDir)}:`);
|
|
40872
41340
|
for (const t of githubTemplates) {
|
|
40873
41341
|
console.log(` - ${t.path}`);
|
|
40874
41342
|
}
|
|
40875
41343
|
console.log(`
|
|
40876
|
-
Files installed to ${
|
|
41344
|
+
Files installed to ${path24.relative(targetPath, agentvDir)}:`);
|
|
40877
41345
|
for (const t of otherAgentvTemplates) {
|
|
40878
41346
|
console.log(` - ${t.path}`);
|
|
40879
41347
|
}
|
|
40880
41348
|
console.log(`
|
|
40881
|
-
Files installed to ${
|
|
41349
|
+
Files installed to ${path24.relative(targetPath, claudeDir)}:`);
|
|
40882
41350
|
for (const t of claudeTemplates) {
|
|
40883
41351
|
console.log(` - ${t.path}`);
|
|
40884
41352
|
}
|
|
@@ -40980,7 +41448,7 @@ function isTTY2() {
|
|
|
40980
41448
|
// src/commands/validate/validate-files.ts
|
|
40981
41449
|
import { constants as constants7 } from "node:fs";
|
|
40982
41450
|
import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
|
|
40983
|
-
import
|
|
41451
|
+
import path25 from "node:path";
|
|
40984
41452
|
async function validateFiles(paths) {
|
|
40985
41453
|
const filePaths = await expandPaths(paths);
|
|
40986
41454
|
const results = [];
|
|
@@ -40998,7 +41466,7 @@ async function validateFiles(paths) {
|
|
|
40998
41466
|
};
|
|
40999
41467
|
}
|
|
41000
41468
|
async function validateSingleFile(filePath) {
|
|
41001
|
-
const absolutePath =
|
|
41469
|
+
const absolutePath = path25.resolve(filePath);
|
|
41002
41470
|
const fileType = await detectFileType(absolutePath);
|
|
41003
41471
|
if (fileType === "unknown") {
|
|
41004
41472
|
return {
|
|
@@ -41037,7 +41505,7 @@ async function validateSingleFile(filePath) {
|
|
|
41037
41505
|
async function expandPaths(paths) {
|
|
41038
41506
|
const expanded = [];
|
|
41039
41507
|
for (const inputPath of paths) {
|
|
41040
|
-
const absolutePath =
|
|
41508
|
+
const absolutePath = path25.resolve(inputPath);
|
|
41041
41509
|
try {
|
|
41042
41510
|
await access7(absolutePath, constants7.F_OK);
|
|
41043
41511
|
} catch {
|
|
@@ -41061,7 +41529,7 @@ async function findYamlFiles(dirPath) {
|
|
|
41061
41529
|
try {
|
|
41062
41530
|
const entries = await readdir3(dirPath, { withFileTypes: true });
|
|
41063
41531
|
for (const entry of entries) {
|
|
41064
|
-
const fullPath =
|
|
41532
|
+
const fullPath = path25.join(dirPath, entry.name);
|
|
41065
41533
|
if (entry.isDirectory()) {
|
|
41066
41534
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
41067
41535
|
continue;
|
|
@@ -41078,7 +41546,7 @@ async function findYamlFiles(dirPath) {
|
|
|
41078
41546
|
return results;
|
|
41079
41547
|
}
|
|
41080
41548
|
function isYamlFile(filePath) {
|
|
41081
|
-
const ext =
|
|
41549
|
+
const ext = path25.extname(filePath).toLowerCase();
|
|
41082
41550
|
return ext === ".yaml" || ext === ".yml";
|
|
41083
41551
|
}
|
|
41084
41552
|
|
|
@@ -41115,6 +41583,7 @@ function createProgram() {
|
|
|
41115
41583
|
registerStatusCommand(program);
|
|
41116
41584
|
registerEvalCommand(program);
|
|
41117
41585
|
registerValidateCommand(program);
|
|
41586
|
+
registerGenerateCommand(program);
|
|
41118
41587
|
program.command("init [path]").description(
|
|
41119
41588
|
"Initialize AgentV in your project (installs prompt templates and schema to .github)"
|
|
41120
41589
|
).action(async (targetPath) => {
|
|
@@ -41137,4 +41606,4 @@ export {
|
|
|
41137
41606
|
createProgram,
|
|
41138
41607
|
runCli
|
|
41139
41608
|
};
|
|
41140
|
-
//# sourceMappingURL=chunk-
|
|
41609
|
+
//# sourceMappingURL=chunk-MA3MJNJH.js.map
|