agentv 0.21.0 → 0.21.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-MA3MJNJH.js → chunk-WOCXZEH4.js} +265 -158
- package/dist/chunk-WOCXZEH4.js.map +1 -0
- package/dist/cli.js +5 -2
- package/dist/cli.js.map +1 -1
- package/dist/index.js +3 -3
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +3 -3
- package/dist/templates/agentv/.env.template +23 -0
- package/package.json +8 -5
- package/dist/chunk-MA3MJNJH.js.map +0 -1
|
@@ -142,11 +142,20 @@ var require_dist = __commonJS({
|
|
|
142
142
|
|
|
143
143
|
// src/index.ts
|
|
144
144
|
import { readFileSync as readFileSync2 } from "node:fs";
|
|
145
|
-
import {
|
|
145
|
+
import { binary, run, subcommands as subcommands2 } from "cmd-ts";
|
|
146
146
|
|
|
147
147
|
// src/commands/eval/index.ts
|
|
148
148
|
import { stat as stat4 } from "node:fs/promises";
|
|
149
149
|
import path19 from "node:path";
|
|
150
|
+
import {
|
|
151
|
+
command,
|
|
152
|
+
flag,
|
|
153
|
+
number as number4,
|
|
154
|
+
option,
|
|
155
|
+
optional as optional2,
|
|
156
|
+
restPositionals,
|
|
157
|
+
string as string4
|
|
158
|
+
} from "cmd-ts";
|
|
150
159
|
import fg from "fast-glob";
|
|
151
160
|
|
|
152
161
|
// src/commands/eval/run-eval.ts
|
|
@@ -155,7 +164,7 @@ import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
|
|
|
155
164
|
import path18 from "node:path";
|
|
156
165
|
import { pathToFileURL } from "node:url";
|
|
157
166
|
|
|
158
|
-
// ../../packages/core/dist/chunk-
|
|
167
|
+
// ../../packages/core/dist/chunk-B2J23S7D.js
|
|
159
168
|
import { constants } from "node:fs";
|
|
160
169
|
import { access, readFile } from "node:fs/promises";
|
|
161
170
|
import path from "node:path";
|
|
@@ -1039,8 +1048,8 @@ var ZodType = class {
|
|
|
1039
1048
|
promise() {
|
|
1040
1049
|
return ZodPromise.create(this, this._def);
|
|
1041
1050
|
}
|
|
1042
|
-
or(
|
|
1043
|
-
return ZodUnion.create([this,
|
|
1051
|
+
or(option4) {
|
|
1052
|
+
return ZodUnion.create([this, option4], this._def);
|
|
1044
1053
|
}
|
|
1045
1054
|
and(incoming) {
|
|
1046
1055
|
return ZodIntersection.create(this, incoming, this._def);
|
|
@@ -2890,7 +2899,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2890
2899
|
return INVALID;
|
|
2891
2900
|
}
|
|
2892
2901
|
if (ctx.common.async) {
|
|
2893
|
-
return Promise.all(options.map(async (
|
|
2902
|
+
return Promise.all(options.map(async (option4) => {
|
|
2894
2903
|
const childCtx = {
|
|
2895
2904
|
...ctx,
|
|
2896
2905
|
common: {
|
|
@@ -2900,7 +2909,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2900
2909
|
parent: null
|
|
2901
2910
|
};
|
|
2902
2911
|
return {
|
|
2903
|
-
result: await
|
|
2912
|
+
result: await option4._parseAsync({
|
|
2904
2913
|
data: ctx.data,
|
|
2905
2914
|
path: ctx.path,
|
|
2906
2915
|
parent: childCtx
|
|
@@ -2911,7 +2920,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2911
2920
|
} else {
|
|
2912
2921
|
let dirty = void 0;
|
|
2913
2922
|
const issues = [];
|
|
2914
|
-
for (const
|
|
2923
|
+
for (const option4 of options) {
|
|
2915
2924
|
const childCtx = {
|
|
2916
2925
|
...ctx,
|
|
2917
2926
|
common: {
|
|
@@ -2920,7 +2929,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2920
2929
|
},
|
|
2921
2930
|
parent: null
|
|
2922
2931
|
};
|
|
2923
|
-
const result =
|
|
2932
|
+
const result = option4._parseSync({
|
|
2924
2933
|
data: ctx.data,
|
|
2925
2934
|
path: ctx.path,
|
|
2926
2935
|
parent: childCtx
|
|
@@ -3001,8 +3010,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
|
|
|
3001
3010
|
}
|
|
3002
3011
|
const discriminator = this.discriminator;
|
|
3003
3012
|
const discriminatorValue = ctx.data[discriminator];
|
|
3004
|
-
const
|
|
3005
|
-
if (!
|
|
3013
|
+
const option4 = this.optionsMap.get(discriminatorValue);
|
|
3014
|
+
if (!option4) {
|
|
3006
3015
|
addIssueToContext(ctx, {
|
|
3007
3016
|
code: ZodIssueCode.invalid_union_discriminator,
|
|
3008
3017
|
options: Array.from(this.optionsMap.keys()),
|
|
@@ -3011,13 +3020,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
|
|
|
3011
3020
|
return INVALID;
|
|
3012
3021
|
}
|
|
3013
3022
|
if (ctx.common.async) {
|
|
3014
|
-
return
|
|
3023
|
+
return option4._parseAsync({
|
|
3015
3024
|
data: ctx.data,
|
|
3016
3025
|
path: ctx.path,
|
|
3017
3026
|
parent: ctx
|
|
3018
3027
|
});
|
|
3019
3028
|
} else {
|
|
3020
|
-
return
|
|
3029
|
+
return option4._parseSync({
|
|
3021
3030
|
data: ctx.data,
|
|
3022
3031
|
path: ctx.path,
|
|
3023
3032
|
parent: ctx
|
|
@@ -4201,7 +4210,7 @@ var coerce = {
|
|
|
4201
4210
|
};
|
|
4202
4211
|
var NEVER = INVALID;
|
|
4203
4212
|
|
|
4204
|
-
// ../../packages/core/dist/chunk-
|
|
4213
|
+
// ../../packages/core/dist/chunk-B2J23S7D.js
|
|
4205
4214
|
async function fileExists(filePath) {
|
|
4206
4215
|
try {
|
|
4207
4216
|
await access(filePath, constants.F_OK);
|
|
@@ -4577,9 +4586,9 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
4577
4586
|
const dryRunSource = target.dry_run ?? target.dryRun;
|
|
4578
4587
|
const subagentRootSource = target.subagent_root ?? target.subagentRoot;
|
|
4579
4588
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
4580
|
-
const
|
|
4589
|
+
const command5 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
4581
4590
|
return {
|
|
4582
|
-
command,
|
|
4591
|
+
command: command5,
|
|
4583
4592
|
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
4584
4593
|
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
4585
4594
|
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
@@ -8081,7 +8090,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
|
|
|
8081
8090
|
defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
|
|
8082
8091
|
defineLazy(inst._zod, "values", () => {
|
|
8083
8092
|
if (def.options.every((o) => o._zod.values)) {
|
|
8084
|
-
return new Set(def.options.flatMap((
|
|
8093
|
+
return new Set(def.options.flatMap((option4) => Array.from(option4._zod.values)));
|
|
8085
8094
|
}
|
|
8086
8095
|
return void 0;
|
|
8087
8096
|
});
|
|
@@ -8095,8 +8104,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
|
|
|
8095
8104
|
inst._zod.parse = (payload, ctx) => {
|
|
8096
8105
|
let async = false;
|
|
8097
8106
|
const results = [];
|
|
8098
|
-
for (const
|
|
8099
|
-
const result =
|
|
8107
|
+
for (const option4 of def.options) {
|
|
8108
|
+
const result = option4._zod.run({
|
|
8100
8109
|
value: payload.value,
|
|
8101
8110
|
issues: []
|
|
8102
8111
|
}, ctx);
|
|
@@ -8121,10 +8130,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
|
|
|
8121
8130
|
const _super = inst._zod.parse;
|
|
8122
8131
|
defineLazy(inst._zod, "propValues", () => {
|
|
8123
8132
|
const propValues = {};
|
|
8124
|
-
for (const
|
|
8125
|
-
const pv =
|
|
8133
|
+
for (const option4 of def.options) {
|
|
8134
|
+
const pv = option4._zod.propValues;
|
|
8126
8135
|
if (!pv || Object.keys(pv).length === 0)
|
|
8127
|
-
throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(
|
|
8136
|
+
throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option4)}"`);
|
|
8128
8137
|
for (const [k, v] of Object.entries(pv)) {
|
|
8129
8138
|
if (!propValues[k])
|
|
8130
8139
|
propValues[k] = /* @__PURE__ */ new Set();
|
|
@@ -15328,8 +15337,8 @@ function isTransforming(_schema, _ctx) {
|
|
|
15328
15337
|
return false;
|
|
15329
15338
|
}
|
|
15330
15339
|
case "union": {
|
|
15331
|
-
for (const
|
|
15332
|
-
if (isTransforming(
|
|
15340
|
+
for (const option4 of def.options) {
|
|
15341
|
+
if (isTransforming(option4, ctx))
|
|
15333
15342
|
return true;
|
|
15334
15343
|
}
|
|
15335
15344
|
return false;
|
|
@@ -35887,7 +35896,7 @@ async function withRetry(fn, retryConfig, signal) {
|
|
|
35887
35896
|
}
|
|
35888
35897
|
var execAsync2 = promisify2(execWithCallback);
|
|
35889
35898
|
var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
|
|
35890
|
-
async function defaultCommandRunner(
|
|
35899
|
+
async function defaultCommandRunner(command5, options) {
|
|
35891
35900
|
const execOptions = {
|
|
35892
35901
|
cwd: options.cwd,
|
|
35893
35902
|
env: options.env,
|
|
@@ -35897,7 +35906,7 @@ async function defaultCommandRunner(command, options) {
|
|
|
35897
35906
|
shell: process.platform === "win32" ? "powershell.exe" : void 0
|
|
35898
35907
|
};
|
|
35899
35908
|
try {
|
|
35900
|
-
const { stdout, stderr } = await execAsync2(
|
|
35909
|
+
const { stdout, stderr } = await execAsync2(command5, execOptions);
|
|
35901
35910
|
return {
|
|
35902
35911
|
stdout,
|
|
35903
35912
|
stderr,
|
|
@@ -37821,7 +37830,7 @@ function pLimit(concurrency) {
|
|
|
37821
37830
|
activeCount--;
|
|
37822
37831
|
resumeNext();
|
|
37823
37832
|
};
|
|
37824
|
-
const
|
|
37833
|
+
const run2 = async (function_, resolve2, arguments_) => {
|
|
37825
37834
|
const result = (async () => function_(...arguments_))();
|
|
37826
37835
|
resolve2(result);
|
|
37827
37836
|
try {
|
|
@@ -37834,7 +37843,7 @@ function pLimit(concurrency) {
|
|
|
37834
37843
|
new Promise((internalResolve) => {
|
|
37835
37844
|
queue.enqueue(internalResolve);
|
|
37836
37845
|
}).then(
|
|
37837
|
-
|
|
37846
|
+
run2.bind(void 0, function_, resolve2, arguments_)
|
|
37838
37847
|
);
|
|
37839
37848
|
(async () => {
|
|
37840
37849
|
await Promise.resolve();
|
|
@@ -38778,9 +38787,6 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
|
|
|
38778
38787
|
}
|
|
38779
38788
|
return parts.join("\n");
|
|
38780
38789
|
}
|
|
38781
|
-
function createAgentKernel() {
|
|
38782
|
-
return { status: "stub" };
|
|
38783
|
-
}
|
|
38784
38790
|
|
|
38785
38791
|
// src/commands/eval/env.ts
|
|
38786
38792
|
import { constants as constants4 } from "node:fs";
|
|
@@ -39513,17 +39519,18 @@ function formatEvaluationSummary(summary) {
|
|
|
39513
39519
|
|
|
39514
39520
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
39515
39521
|
import { readFile as readFile7 } from "node:fs/promises";
|
|
39522
|
+
import path16 from "node:path";
|
|
39516
39523
|
import { parse as parse6 } from "yaml";
|
|
39517
39524
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
39518
|
-
import
|
|
39525
|
+
import path23 from "node:path";
|
|
39519
39526
|
import { parse as parse23 } from "yaml";
|
|
39520
39527
|
import { readFile as readFile33 } from "node:fs/promises";
|
|
39521
|
-
import
|
|
39528
|
+
import path33 from "node:path";
|
|
39522
39529
|
import { parse as parse33 } from "yaml";
|
|
39523
39530
|
import { readFile as readFile43 } from "node:fs/promises";
|
|
39524
39531
|
import { parse as parse42 } from "yaml";
|
|
39525
39532
|
import { readFile as readFile52 } from "node:fs/promises";
|
|
39526
|
-
import
|
|
39533
|
+
import path43 from "node:path";
|
|
39527
39534
|
import { parse as parse52 } from "yaml";
|
|
39528
39535
|
var SCHEMA_EVAL_V2 = "agentv-eval-v2";
|
|
39529
39536
|
var SCHEMA_TARGETS_V2 = "agentv-targets-v2.2";
|
|
@@ -39533,12 +39540,12 @@ async function detectFileType(filePath) {
|
|
|
39533
39540
|
const content = await readFile7(filePath, "utf8");
|
|
39534
39541
|
const parsed = parse6(content);
|
|
39535
39542
|
if (typeof parsed !== "object" || parsed === null) {
|
|
39536
|
-
return
|
|
39543
|
+
return inferFileTypeFromPath(filePath);
|
|
39537
39544
|
}
|
|
39538
39545
|
const record2 = parsed;
|
|
39539
39546
|
const schema = record2.$schema;
|
|
39540
39547
|
if (typeof schema !== "string") {
|
|
39541
|
-
return
|
|
39548
|
+
return inferFileTypeFromPath(filePath);
|
|
39542
39549
|
}
|
|
39543
39550
|
switch (schema) {
|
|
39544
39551
|
case SCHEMA_EVAL_V2:
|
|
@@ -39548,18 +39555,31 @@ async function detectFileType(filePath) {
|
|
|
39548
39555
|
case SCHEMA_CONFIG_V22:
|
|
39549
39556
|
return "config";
|
|
39550
39557
|
default:
|
|
39551
|
-
return
|
|
39558
|
+
return inferFileTypeFromPath(filePath);
|
|
39552
39559
|
}
|
|
39553
39560
|
} catch {
|
|
39554
|
-
return
|
|
39561
|
+
return inferFileTypeFromPath(filePath);
|
|
39562
|
+
}
|
|
39563
|
+
}
|
|
39564
|
+
function inferFileTypeFromPath(filePath) {
|
|
39565
|
+
const normalized = path16.normalize(filePath).replace(/\\/g, "/");
|
|
39566
|
+
const basename = path16.basename(filePath);
|
|
39567
|
+
if (normalized.includes("/.agentv/")) {
|
|
39568
|
+
if (basename === "config.yaml" || basename === "config.yml") {
|
|
39569
|
+
return "config";
|
|
39570
|
+
}
|
|
39571
|
+
if (basename === "targets.yaml" || basename === "targets.yml") {
|
|
39572
|
+
return "targets";
|
|
39573
|
+
}
|
|
39555
39574
|
}
|
|
39575
|
+
return "eval";
|
|
39556
39576
|
}
|
|
39557
39577
|
function isObject2(value) {
|
|
39558
39578
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
39559
39579
|
}
|
|
39560
39580
|
async function validateEvalFile(filePath) {
|
|
39561
39581
|
const errors = [];
|
|
39562
|
-
const absolutePath =
|
|
39582
|
+
const absolutePath = path23.resolve(filePath);
|
|
39563
39583
|
let parsed;
|
|
39564
39584
|
try {
|
|
39565
39585
|
const content = await readFile23(absolutePath, "utf8");
|
|
@@ -39908,7 +39928,7 @@ function validateUnknownSettings(target, provider, absolutePath, location, error
|
|
|
39908
39928
|
}
|
|
39909
39929
|
async function validateTargetsFile(filePath) {
|
|
39910
39930
|
const errors = [];
|
|
39911
|
-
const absolutePath =
|
|
39931
|
+
const absolutePath = path33.resolve(filePath);
|
|
39912
39932
|
let parsed;
|
|
39913
39933
|
try {
|
|
39914
39934
|
const content = await readFile33(absolutePath, "utf8");
|
|
@@ -40187,8 +40207,8 @@ async function validateConfigFile(filePath) {
|
|
|
40187
40207
|
}
|
|
40188
40208
|
const config2 = parsed;
|
|
40189
40209
|
const schema = config2.$schema;
|
|
40190
|
-
if (schema !== SCHEMA_CONFIG_V222) {
|
|
40191
|
-
const message =
|
|
40210
|
+
if (schema !== void 0 && schema !== SCHEMA_CONFIG_V222) {
|
|
40211
|
+
const message = `Invalid $schema value '${schema}'. Expected '${SCHEMA_CONFIG_V222}' or omit the field.`;
|
|
40192
40212
|
errors.push({
|
|
40193
40213
|
severity: "error",
|
|
40194
40214
|
filePath,
|
|
@@ -40250,7 +40270,7 @@ function isObject3(value) {
|
|
|
40250
40270
|
}
|
|
40251
40271
|
async function validateFileReferences(evalFilePath) {
|
|
40252
40272
|
const errors = [];
|
|
40253
|
-
const absolutePath =
|
|
40273
|
+
const absolutePath = path43.resolve(evalFilePath);
|
|
40254
40274
|
const gitRoot = await findGitRoot(absolutePath);
|
|
40255
40275
|
if (!gitRoot) {
|
|
40256
40276
|
errors.push({
|
|
@@ -40607,12 +40627,12 @@ function buildDefaultOutputPath(cwd, format) {
|
|
|
40607
40627
|
const extension = getDefaultExtension(format);
|
|
40608
40628
|
return path18.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
40609
40629
|
}
|
|
40610
|
-
function resolvePromptDirectory(
|
|
40611
|
-
if (
|
|
40630
|
+
function resolvePromptDirectory(option4, cwd) {
|
|
40631
|
+
if (option4 === void 0) {
|
|
40612
40632
|
return void 0;
|
|
40613
40633
|
}
|
|
40614
|
-
if (typeof
|
|
40615
|
-
return path18.resolve(cwd,
|
|
40634
|
+
if (typeof option4 === "string" && option4.trim().length > 0) {
|
|
40635
|
+
return path18.resolve(cwd, option4);
|
|
40616
40636
|
}
|
|
40617
40637
|
return path18.join(cwd, ".agentv", "prompts");
|
|
40618
40638
|
}
|
|
@@ -40916,56 +40936,119 @@ async function resolveEvaluationRunner() {
|
|
|
40916
40936
|
}
|
|
40917
40937
|
|
|
40918
40938
|
// src/commands/eval/index.ts
|
|
40919
|
-
|
|
40920
|
-
|
|
40921
|
-
|
|
40922
|
-
|
|
40923
|
-
|
|
40924
|
-
|
|
40925
|
-
|
|
40926
|
-
|
|
40927
|
-
|
|
40928
|
-
|
|
40929
|
-
|
|
40930
|
-
|
|
40931
|
-
|
|
40932
|
-
|
|
40933
|
-
|
|
40934
|
-
|
|
40935
|
-
|
|
40936
|
-
|
|
40937
|
-
|
|
40938
|
-
|
|
40939
|
-
|
|
40940
|
-
|
|
40941
|
-
|
|
40942
|
-
|
|
40943
|
-
|
|
40944
|
-
|
|
40945
|
-
|
|
40946
|
-
|
|
40947
|
-
|
|
40948
|
-
|
|
40949
|
-
|
|
40950
|
-
|
|
40951
|
-
|
|
40952
|
-
|
|
40953
|
-
|
|
40954
|
-
|
|
40955
|
-
|
|
40956
|
-
|
|
40957
|
-
|
|
40958
|
-
|
|
40959
|
-
|
|
40960
|
-
|
|
40961
|
-
|
|
40962
|
-
|
|
40963
|
-
|
|
40964
|
-
|
|
40939
|
+
var evalCommand = command({
|
|
40940
|
+
name: "eval",
|
|
40941
|
+
description: "Run eval suites and report results",
|
|
40942
|
+
args: {
|
|
40943
|
+
evalPaths: restPositionals({
|
|
40944
|
+
type: string4,
|
|
40945
|
+
displayName: "eval-paths",
|
|
40946
|
+
description: "Path(s) or glob(s) to evaluation .yaml file(s)"
|
|
40947
|
+
}),
|
|
40948
|
+
target: option({
|
|
40949
|
+
type: string4,
|
|
40950
|
+
long: "target",
|
|
40951
|
+
description: "Override target name from targets.yaml",
|
|
40952
|
+
defaultValue: () => "default"
|
|
40953
|
+
}),
|
|
40954
|
+
targets: option({
|
|
40955
|
+
type: optional2(string4),
|
|
40956
|
+
long: "targets",
|
|
40957
|
+
description: "Path to targets.yaml (overrides discovery)"
|
|
40958
|
+
}),
|
|
40959
|
+
evalId: option({
|
|
40960
|
+
type: optional2(string4),
|
|
40961
|
+
long: "eval-id",
|
|
40962
|
+
description: "Run only the eval case with this identifier"
|
|
40963
|
+
}),
|
|
40964
|
+
workers: option({
|
|
40965
|
+
type: number4,
|
|
40966
|
+
long: "workers",
|
|
40967
|
+
description: "Number of parallel workers (default: 1, max: 50). Can also be set per-target in targets.yaml",
|
|
40968
|
+
defaultValue: () => 1
|
|
40969
|
+
}),
|
|
40970
|
+
out: option({
|
|
40971
|
+
type: optional2(string4),
|
|
40972
|
+
long: "out",
|
|
40973
|
+
description: "Write results to the specified path"
|
|
40974
|
+
}),
|
|
40975
|
+
outputFormat: option({
|
|
40976
|
+
type: string4,
|
|
40977
|
+
long: "output-format",
|
|
40978
|
+
description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
|
|
40979
|
+
defaultValue: () => "jsonl"
|
|
40980
|
+
}),
|
|
40981
|
+
dryRun: flag({
|
|
40982
|
+
long: "dry-run",
|
|
40983
|
+
description: "Use mock provider responses instead of real LLM calls"
|
|
40984
|
+
}),
|
|
40985
|
+
dryRunDelay: option({
|
|
40986
|
+
type: number4,
|
|
40987
|
+
long: "dry-run-delay",
|
|
40988
|
+
description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
|
|
40989
|
+
defaultValue: () => 0
|
|
40990
|
+
}),
|
|
40991
|
+
dryRunDelayMin: option({
|
|
40992
|
+
type: number4,
|
|
40993
|
+
long: "dry-run-delay-min",
|
|
40994
|
+
description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
|
|
40995
|
+
defaultValue: () => 0
|
|
40996
|
+
}),
|
|
40997
|
+
dryRunDelayMax: option({
|
|
40998
|
+
type: number4,
|
|
40999
|
+
long: "dry-run-delay-max",
|
|
41000
|
+
description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
|
|
41001
|
+
defaultValue: () => 0
|
|
41002
|
+
}),
|
|
41003
|
+
agentTimeout: option({
|
|
41004
|
+
type: number4,
|
|
41005
|
+
long: "agent-timeout",
|
|
41006
|
+
description: "Timeout in seconds for provider responses (default: 120)",
|
|
41007
|
+
defaultValue: () => 120
|
|
41008
|
+
}),
|
|
41009
|
+
maxRetries: option({
|
|
41010
|
+
type: number4,
|
|
41011
|
+
long: "max-retries",
|
|
41012
|
+
description: "Retry count for timeout recoveries (default: 2)",
|
|
41013
|
+
defaultValue: () => 2
|
|
41014
|
+
}),
|
|
41015
|
+
cache: flag({
|
|
41016
|
+
long: "cache",
|
|
41017
|
+
description: "Enable in-memory provider response cache"
|
|
41018
|
+
}),
|
|
41019
|
+
verbose: flag({
|
|
41020
|
+
long: "verbose",
|
|
41021
|
+
description: "Enable verbose logging"
|
|
41022
|
+
}),
|
|
41023
|
+
dumpPrompts: option({
|
|
41024
|
+
type: optional2(string4),
|
|
41025
|
+
long: "dump-prompts",
|
|
41026
|
+
description: "Directory path for persisting prompt payloads for debugging"
|
|
41027
|
+
})
|
|
41028
|
+
},
|
|
41029
|
+
handler: async (args) => {
|
|
41030
|
+
const resolvedPaths = await resolveEvalPaths(args.evalPaths, process.cwd());
|
|
41031
|
+
const dumpPrompts = args.dumpPrompts !== void 0 ? args.dumpPrompts === "." ? true : args.dumpPrompts : void 0;
|
|
41032
|
+
const rawOptions = {
|
|
41033
|
+
target: args.target,
|
|
41034
|
+
targets: args.targets,
|
|
41035
|
+
evalId: args.evalId,
|
|
41036
|
+
workers: args.workers,
|
|
41037
|
+
out: args.out,
|
|
41038
|
+
outputFormat: args.outputFormat,
|
|
41039
|
+
dryRun: args.dryRun,
|
|
41040
|
+
dryRunDelay: args.dryRunDelay,
|
|
41041
|
+
dryRunDelayMin: args.dryRunDelayMin,
|
|
41042
|
+
dryRunDelayMax: args.dryRunDelayMax,
|
|
41043
|
+
agentTimeout: args.agentTimeout,
|
|
41044
|
+
maxRetries: args.maxRetries,
|
|
41045
|
+
cache: args.cache,
|
|
41046
|
+
verbose: args.verbose,
|
|
41047
|
+
dumpPrompts
|
|
41048
|
+
};
|
|
40965
41049
|
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
40966
|
-
}
|
|
40967
|
-
|
|
40968
|
-
}
|
|
41050
|
+
}
|
|
41051
|
+
});
|
|
40969
41052
|
async function resolveEvalPaths(evalPaths, cwd) {
|
|
40970
41053
|
const normalizedInputs = evalPaths.map((value) => value?.trim()).filter((value) => value);
|
|
40971
41054
|
if (normalizedInputs.length === 0) {
|
|
@@ -41013,6 +41096,9 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
41013
41096
|
return sorted;
|
|
41014
41097
|
}
|
|
41015
41098
|
|
|
41099
|
+
// src/commands/generate/index.ts
|
|
41100
|
+
import { command as command2, flag as flag2, option as option2, optional as optional3, positional as positional2, string as string5, subcommands } from "cmd-ts";
|
|
41101
|
+
|
|
41016
41102
|
// src/commands/generate/rubrics.ts
|
|
41017
41103
|
import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
|
|
41018
41104
|
import path20 from "node:path";
|
|
@@ -41154,29 +41240,53 @@ function extractQuestion(evalCase) {
|
|
|
41154
41240
|
}
|
|
41155
41241
|
|
|
41156
41242
|
// src/commands/generate/index.ts
|
|
41157
|
-
|
|
41158
|
-
|
|
41159
|
-
|
|
41160
|
-
|
|
41161
|
-
|
|
41162
|
-
|
|
41243
|
+
var rubricsCommand = command2({
|
|
41244
|
+
name: "rubrics",
|
|
41245
|
+
description: "Generate rubrics from expected_outcome in YAML eval file",
|
|
41246
|
+
args: {
|
|
41247
|
+
file: positional2({
|
|
41248
|
+
type: string5,
|
|
41249
|
+
displayName: "file",
|
|
41250
|
+
description: "Path to YAML eval file"
|
|
41251
|
+
}),
|
|
41252
|
+
target: option2({
|
|
41253
|
+
type: optional3(string5),
|
|
41254
|
+
long: "target",
|
|
41255
|
+
short: "t",
|
|
41256
|
+
description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
|
|
41257
|
+
}),
|
|
41258
|
+
verbose: flag2({
|
|
41259
|
+
long: "verbose",
|
|
41260
|
+
short: "v",
|
|
41261
|
+
description: "Show detailed progress"
|
|
41262
|
+
})
|
|
41263
|
+
},
|
|
41264
|
+
handler: async ({ file: file2, target, verbose }) => {
|
|
41163
41265
|
try {
|
|
41164
41266
|
await generateRubricsCommand({
|
|
41165
41267
|
file: file2,
|
|
41166
|
-
target
|
|
41167
|
-
verbose
|
|
41268
|
+
target,
|
|
41269
|
+
verbose
|
|
41168
41270
|
});
|
|
41169
41271
|
} catch (error40) {
|
|
41170
41272
|
console.error(`Error: ${error40.message}`);
|
|
41171
41273
|
process.exit(1);
|
|
41172
41274
|
}
|
|
41173
|
-
}
|
|
41174
|
-
}
|
|
41275
|
+
}
|
|
41276
|
+
});
|
|
41277
|
+
var generateCommand = subcommands({
|
|
41278
|
+
name: "generate",
|
|
41279
|
+
description: "Generate evaluation artifacts",
|
|
41280
|
+
cmds: {
|
|
41281
|
+
rubrics: rubricsCommand
|
|
41282
|
+
}
|
|
41283
|
+
});
|
|
41175
41284
|
|
|
41176
41285
|
// src/commands/init/index.ts
|
|
41177
41286
|
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
41178
41287
|
import path24 from "node:path";
|
|
41179
41288
|
import * as readline from "node:readline/promises";
|
|
41289
|
+
import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
|
|
41180
41290
|
|
|
41181
41291
|
// src/templates/index.ts
|
|
41182
41292
|
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
@@ -41355,15 +41465,28 @@ Files installed to ${path24.relative(targetPath, claudeDir)}:`);
|
|
|
41355
41465
|
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
41356
41466
|
console.log(" 3. Create eval files using the schema and prompt templates");
|
|
41357
41467
|
}
|
|
41468
|
+
var initCmdTsCommand = command3({
|
|
41469
|
+
name: "init",
|
|
41470
|
+
description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
|
|
41471
|
+
args: {
|
|
41472
|
+
path: option3({
|
|
41473
|
+
type: optional4(string6),
|
|
41474
|
+
long: "path",
|
|
41475
|
+
description: "Target directory for initialization (default: current directory)"
|
|
41476
|
+
})
|
|
41477
|
+
},
|
|
41478
|
+
handler: async ({ path: targetPath }) => {
|
|
41479
|
+
try {
|
|
41480
|
+
await initCommand({ targetPath });
|
|
41481
|
+
} catch (error40) {
|
|
41482
|
+
console.error(`Error: ${error40.message}`);
|
|
41483
|
+
process.exit(1);
|
|
41484
|
+
}
|
|
41485
|
+
}
|
|
41486
|
+
});
|
|
41358
41487
|
|
|
41359
|
-
// src/commands/
|
|
41360
|
-
|
|
41361
|
-
program.command("status").description("Show the latest AgentV kernel status").action(() => {
|
|
41362
|
-
const kernel = createAgentKernel();
|
|
41363
|
-
console.log(`Kernel status: ${kernel.status}`);
|
|
41364
|
-
});
|
|
41365
|
-
return program;
|
|
41366
|
-
}
|
|
41488
|
+
// src/commands/validate/index.ts
|
|
41489
|
+
import { command as command4, restPositionals as restPositionals2, string as string7 } from "cmd-ts";
|
|
41367
41490
|
|
|
41368
41491
|
// src/commands/validate/format-output.ts
|
|
41369
41492
|
var ANSI_RED3 = "\x1B[31m";
|
|
@@ -41468,20 +41591,6 @@ async function validateFiles(paths) {
|
|
|
41468
41591
|
async function validateSingleFile(filePath) {
|
|
41469
41592
|
const absolutePath = path25.resolve(filePath);
|
|
41470
41593
|
const fileType = await detectFileType(absolutePath);
|
|
41471
|
-
if (fileType === "unknown") {
|
|
41472
|
-
return {
|
|
41473
|
-
valid: false,
|
|
41474
|
-
filePath: absolutePath,
|
|
41475
|
-
fileType: "unknown",
|
|
41476
|
-
errors: [
|
|
41477
|
-
{
|
|
41478
|
-
severity: "error",
|
|
41479
|
-
filePath: absolutePath,
|
|
41480
|
-
message: "Missing or invalid $schema field. File must declare schema: 'agentv-eval-v2', 'agentv-targets-v2', or 'agentv-config-v2'"
|
|
41481
|
-
}
|
|
41482
|
-
]
|
|
41483
|
-
};
|
|
41484
|
-
}
|
|
41485
41594
|
let result;
|
|
41486
41595
|
if (fileType === "eval") {
|
|
41487
41596
|
result = await validateEvalFile(absolutePath);
|
|
@@ -41551,7 +41660,7 @@ function isYamlFile(filePath) {
|
|
|
41551
41660
|
}
|
|
41552
41661
|
|
|
41553
41662
|
// src/commands/validate/index.ts
|
|
41554
|
-
async function runValidateCommand(paths
|
|
41663
|
+
async function runValidateCommand(paths) {
|
|
41555
41664
|
if (paths.length === 0) {
|
|
41556
41665
|
console.error("Error: No paths specified. Usage: agentv validate <paths...>");
|
|
41557
41666
|
process.exit(1);
|
|
@@ -41563,47 +41672,45 @@ async function runValidateCommand(paths, _options) {
|
|
|
41563
41672
|
process.exit(1);
|
|
41564
41673
|
}
|
|
41565
41674
|
}
|
|
41566
|
-
|
|
41567
|
-
|
|
41675
|
+
var validateCommand = command4({
|
|
41676
|
+
name: "validate",
|
|
41677
|
+
description: "Validate AgentV eval and targets YAML files",
|
|
41678
|
+
args: {
|
|
41679
|
+
paths: restPositionals2({
|
|
41680
|
+
type: string7,
|
|
41681
|
+
displayName: "paths",
|
|
41682
|
+
description: "Files or directories to validate"
|
|
41683
|
+
})
|
|
41684
|
+
},
|
|
41685
|
+
handler: async ({ paths }) => {
|
|
41568
41686
|
try {
|
|
41569
|
-
await runValidateCommand(paths
|
|
41687
|
+
await runValidateCommand(paths);
|
|
41570
41688
|
} catch (error40) {
|
|
41571
41689
|
console.error(`Error: ${error40.message}`);
|
|
41572
41690
|
process.exit(1);
|
|
41573
41691
|
}
|
|
41574
|
-
}
|
|
41575
|
-
|
|
41576
|
-
}
|
|
41692
|
+
}
|
|
41693
|
+
});
|
|
41577
41694
|
|
|
41578
41695
|
// src/index.ts
|
|
41579
41696
|
var packageJson = JSON.parse(readFileSync2(new URL("../package.json", import.meta.url), "utf8"));
|
|
41580
|
-
|
|
41581
|
-
|
|
41582
|
-
|
|
41583
|
-
|
|
41584
|
-
|
|
41585
|
-
|
|
41586
|
-
|
|
41587
|
-
|
|
41588
|
-
|
|
41589
|
-
|
|
41590
|
-
|
|
41591
|
-
await initCommand({ targetPath });
|
|
41592
|
-
} catch (error40) {
|
|
41593
|
-
console.error(`Error: ${error40.message}`);
|
|
41594
|
-
process.exit(1);
|
|
41595
|
-
}
|
|
41596
|
-
});
|
|
41597
|
-
return program;
|
|
41598
|
-
}
|
|
41697
|
+
var app = subcommands2({
|
|
41698
|
+
name: "agentv",
|
|
41699
|
+
description: "AgentV CLI",
|
|
41700
|
+
version: packageJson.version,
|
|
41701
|
+
cmds: {
|
|
41702
|
+
eval: evalCommand,
|
|
41703
|
+
validate: validateCommand,
|
|
41704
|
+
generate: generateCommand,
|
|
41705
|
+
init: initCmdTsCommand
|
|
41706
|
+
}
|
|
41707
|
+
});
|
|
41599
41708
|
async function runCli(argv = process.argv) {
|
|
41600
|
-
|
|
41601
|
-
await program.parseAsync(argv);
|
|
41602
|
-
return program;
|
|
41709
|
+
await run(binary(app), argv);
|
|
41603
41710
|
}
|
|
41604
41711
|
|
|
41605
41712
|
export {
|
|
41606
|
-
|
|
41713
|
+
app,
|
|
41607
41714
|
runCli
|
|
41608
41715
|
};
|
|
41609
|
-
//# sourceMappingURL=chunk-
|
|
41716
|
+
//# sourceMappingURL=chunk-WOCXZEH4.js.map
|