agentv 1.3.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +439 -441
- package/dist/{chunk-6R2YRXCQ.js → chunk-3RYQPI4H.js} +487 -329
- package/dist/chunk-3RYQPI4H.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/.agentv/.env.template +23 -23
- package/dist/templates/.agentv/config.yaml +15 -15
- package/dist/templates/.agentv/targets.yaml +71 -73
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +212 -211
- package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +318 -288
- package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +215 -215
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +216 -213
- package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +340 -333
- package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +139 -139
- package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +198 -179
- package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +77 -77
- package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +4 -4
- package/dist/templates/.github/prompts/agentv-optimize.prompt.md +3 -3
- package/package.json +2 -5
- package/dist/chunk-6R2YRXCQ.js.map +0 -1
|
@@ -141,30 +141,14 @@ var require_dist = __commonJS({
|
|
|
141
141
|
});
|
|
142
142
|
|
|
143
143
|
// src/index.ts
|
|
144
|
-
import { readFileSync as
|
|
144
|
+
import { readFileSync as readFileSync3 } from "node:fs";
|
|
145
145
|
import { binary, run, subcommands as subcommands2 } from "cmd-ts";
|
|
146
146
|
|
|
147
|
-
// src/commands/
|
|
148
|
-
import {
|
|
149
|
-
import
|
|
150
|
-
import {
|
|
151
|
-
command,
|
|
152
|
-
flag,
|
|
153
|
-
number as number4,
|
|
154
|
-
option,
|
|
155
|
-
optional as optional2,
|
|
156
|
-
restPositionals,
|
|
157
|
-
string as string4
|
|
158
|
-
} from "cmd-ts";
|
|
159
|
-
import fg from "fast-glob";
|
|
160
|
-
|
|
161
|
-
// src/commands/eval/run-eval.ts
|
|
162
|
-
import { constants as constants6 } from "node:fs";
|
|
163
|
-
import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
|
|
164
|
-
import path19 from "node:path";
|
|
165
|
-
import { pathToFileURL } from "node:url";
|
|
147
|
+
// src/commands/convert/index.ts
|
|
148
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
149
|
+
import path14 from "node:path";
|
|
166
150
|
|
|
167
|
-
// ../../packages/core/dist/chunk-
|
|
151
|
+
// ../../packages/core/dist/chunk-KPHTMTZ3.js
|
|
168
152
|
import { constants } from "node:fs";
|
|
169
153
|
import { access, readFile } from "node:fs/promises";
|
|
170
154
|
import path from "node:path";
|
|
@@ -648,8 +632,8 @@ function getErrorMap() {
|
|
|
648
632
|
|
|
649
633
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
650
634
|
var makeIssue = (params) => {
|
|
651
|
-
const { data, path:
|
|
652
|
-
const fullPath = [...
|
|
635
|
+
const { data, path: path28, errorMaps, issueData } = params;
|
|
636
|
+
const fullPath = [...path28, ...issueData.path || []];
|
|
653
637
|
const fullIssue = {
|
|
654
638
|
...issueData,
|
|
655
639
|
path: fullPath
|
|
@@ -765,11 +749,11 @@ var errorUtil;
|
|
|
765
749
|
|
|
766
750
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
|
|
767
751
|
var ParseInputLazyPath = class {
|
|
768
|
-
constructor(parent, value,
|
|
752
|
+
constructor(parent, value, path28, key2) {
|
|
769
753
|
this._cachedPath = [];
|
|
770
754
|
this.parent = parent;
|
|
771
755
|
this.data = value;
|
|
772
|
-
this._path =
|
|
756
|
+
this._path = path28;
|
|
773
757
|
this._key = key2;
|
|
774
758
|
}
|
|
775
759
|
get path() {
|
|
@@ -1049,8 +1033,8 @@ var ZodType = class {
|
|
|
1049
1033
|
promise() {
|
|
1050
1034
|
return ZodPromise.create(this, this._def);
|
|
1051
1035
|
}
|
|
1052
|
-
or(
|
|
1053
|
-
return ZodUnion.create([this,
|
|
1036
|
+
or(option5) {
|
|
1037
|
+
return ZodUnion.create([this, option5], this._def);
|
|
1054
1038
|
}
|
|
1055
1039
|
and(incoming) {
|
|
1056
1040
|
return ZodIntersection.create(this, incoming, this._def);
|
|
@@ -2900,7 +2884,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2900
2884
|
return INVALID;
|
|
2901
2885
|
}
|
|
2902
2886
|
if (ctx.common.async) {
|
|
2903
|
-
return Promise.all(options.map(async (
|
|
2887
|
+
return Promise.all(options.map(async (option5) => {
|
|
2904
2888
|
const childCtx = {
|
|
2905
2889
|
...ctx,
|
|
2906
2890
|
common: {
|
|
@@ -2910,7 +2894,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2910
2894
|
parent: null
|
|
2911
2895
|
};
|
|
2912
2896
|
return {
|
|
2913
|
-
result: await
|
|
2897
|
+
result: await option5._parseAsync({
|
|
2914
2898
|
data: ctx.data,
|
|
2915
2899
|
path: ctx.path,
|
|
2916
2900
|
parent: childCtx
|
|
@@ -2921,7 +2905,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2921
2905
|
} else {
|
|
2922
2906
|
let dirty = void 0;
|
|
2923
2907
|
const issues = [];
|
|
2924
|
-
for (const
|
|
2908
|
+
for (const option5 of options) {
|
|
2925
2909
|
const childCtx = {
|
|
2926
2910
|
...ctx,
|
|
2927
2911
|
common: {
|
|
@@ -2930,7 +2914,7 @@ var ZodUnion = class extends ZodType {
|
|
|
2930
2914
|
},
|
|
2931
2915
|
parent: null
|
|
2932
2916
|
};
|
|
2933
|
-
const result =
|
|
2917
|
+
const result = option5._parseSync({
|
|
2934
2918
|
data: ctx.data,
|
|
2935
2919
|
path: ctx.path,
|
|
2936
2920
|
parent: childCtx
|
|
@@ -3011,8 +2995,8 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
|
|
|
3011
2995
|
}
|
|
3012
2996
|
const discriminator = this.discriminator;
|
|
3013
2997
|
const discriminatorValue = ctx.data[discriminator];
|
|
3014
|
-
const
|
|
3015
|
-
if (!
|
|
2998
|
+
const option5 = this.optionsMap.get(discriminatorValue);
|
|
2999
|
+
if (!option5) {
|
|
3016
3000
|
addIssueToContext(ctx, {
|
|
3017
3001
|
code: ZodIssueCode.invalid_union_discriminator,
|
|
3018
3002
|
options: Array.from(this.optionsMap.keys()),
|
|
@@ -3021,13 +3005,13 @@ var ZodDiscriminatedUnion = class _ZodDiscriminatedUnion extends ZodType {
|
|
|
3021
3005
|
return INVALID;
|
|
3022
3006
|
}
|
|
3023
3007
|
if (ctx.common.async) {
|
|
3024
|
-
return
|
|
3008
|
+
return option5._parseAsync({
|
|
3025
3009
|
data: ctx.data,
|
|
3026
3010
|
path: ctx.path,
|
|
3027
3011
|
parent: ctx
|
|
3028
3012
|
});
|
|
3029
3013
|
} else {
|
|
3030
|
-
return
|
|
3014
|
+
return option5._parseSync({
|
|
3031
3015
|
data: ctx.data,
|
|
3032
3016
|
path: ctx.path,
|
|
3033
3017
|
parent: ctx
|
|
@@ -4211,7 +4195,7 @@ var coerce = {
|
|
|
4211
4195
|
};
|
|
4212
4196
|
var NEVER = INVALID;
|
|
4213
4197
|
|
|
4214
|
-
// ../../packages/core/dist/chunk-
|
|
4198
|
+
// ../../packages/core/dist/chunk-KPHTMTZ3.js
|
|
4215
4199
|
async function fileExists(filePath) {
|
|
4216
4200
|
try {
|
|
4217
4201
|
await access(filePath, constants.F_OK);
|
|
@@ -4227,10 +4211,6 @@ async function readTextFile(filePath) {
|
|
|
4227
4211
|
const content = await readFile(filePath, "utf8");
|
|
4228
4212
|
return normalizeLineEndings(content);
|
|
4229
4213
|
}
|
|
4230
|
-
async function readJsonFile(filePath) {
|
|
4231
|
-
const content = await readFile(filePath, "utf8");
|
|
4232
|
-
return JSON.parse(content);
|
|
4233
|
-
}
|
|
4234
4214
|
async function findGitRoot(startPath) {
|
|
4235
4215
|
let currentDir = path.dirname(path.resolve(startPath));
|
|
4236
4216
|
const root2 = path.parse(currentDir).root;
|
|
@@ -4574,8 +4554,7 @@ function normalizeCodexLogFormat(value) {
|
|
|
4574
4554
|
}
|
|
4575
4555
|
function resolveMockConfig(target) {
|
|
4576
4556
|
const response = typeof target.response === "string" ? target.response : void 0;
|
|
4577
|
-
|
|
4578
|
-
return { response, trace: trace2 };
|
|
4557
|
+
return { response };
|
|
4579
4558
|
}
|
|
4580
4559
|
function resolveVSCodeConfig(target, env, insiders) {
|
|
4581
4560
|
const workspaceTemplateEnvVar = resolveOptionalLiteralString(
|
|
@@ -4595,9 +4574,9 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
4595
4574
|
const dryRunSource = target.dry_run ?? target.dryRun;
|
|
4596
4575
|
const subagentRootSource = target.subagent_root ?? target.subagentRoot;
|
|
4597
4576
|
const defaultCommand = insiders ? "code-insiders" : "code";
|
|
4598
|
-
const
|
|
4577
|
+
const command6 = resolveOptionalLiteralString(commandSource) ?? defaultCommand;
|
|
4599
4578
|
return {
|
|
4600
|
-
command:
|
|
4579
|
+
command: command6,
|
|
4601
4580
|
waitForResponse: resolveOptionalBoolean(waitSource) ?? true,
|
|
4602
4581
|
dryRun: resolveOptionalBoolean(dryRunSource) ?? false,
|
|
4603
4582
|
subagentRoot: resolveOptionalString(subagentRootSource, env, `${target.name} subagent root`, {
|
|
@@ -4613,6 +4592,9 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
4613
4592
|
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
4614
4593
|
);
|
|
4615
4594
|
const verbose = resolveOptionalBoolean(target.verbose ?? target.cli_verbose ?? target.cliVerbose);
|
|
4595
|
+
const keepTempFiles = resolveOptionalBoolean(
|
|
4596
|
+
target.keep_temp_files ?? target.keepTempFiles ?? target.keep_output_files ?? target.keepOutputFiles
|
|
4597
|
+
);
|
|
4616
4598
|
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
4617
4599
|
allowLiteral: true,
|
|
4618
4600
|
optionalEnv: true
|
|
@@ -4641,7 +4623,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
4641
4623
|
cwd,
|
|
4642
4624
|
timeoutMs,
|
|
4643
4625
|
healthcheck,
|
|
4644
|
-
verbose
|
|
4626
|
+
verbose,
|
|
4627
|
+
keepTempFiles
|
|
4645
4628
|
};
|
|
4646
4629
|
}
|
|
4647
4630
|
function resolveTimeoutMs(source2, description) {
|
|
@@ -4891,6 +4874,21 @@ var PROVIDER_ALIASES = [
|
|
|
4891
4874
|
"vertex"
|
|
4892
4875
|
// legacy/future support
|
|
4893
4876
|
];
|
|
4877
|
+
function extractLastAssistantContent(messages) {
|
|
4878
|
+
if (!messages || messages.length === 0) {
|
|
4879
|
+
return "";
|
|
4880
|
+
}
|
|
4881
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
4882
|
+
const msg = messages[i];
|
|
4883
|
+
if (msg.role === "assistant" && msg.content !== void 0) {
|
|
4884
|
+
if (typeof msg.content === "string") {
|
|
4885
|
+
return msg.content;
|
|
4886
|
+
}
|
|
4887
|
+
return JSON.stringify(msg.content);
|
|
4888
|
+
}
|
|
4889
|
+
}
|
|
4890
|
+
return "";
|
|
4891
|
+
}
|
|
4894
4892
|
function isAgentProvider(provider) {
|
|
4895
4893
|
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
4896
4894
|
}
|
|
@@ -6001,10 +5999,10 @@ function assignProp(target, prop, value) {
|
|
|
6001
5999
|
configurable: true
|
|
6002
6000
|
});
|
|
6003
6001
|
}
|
|
6004
|
-
function getElementAtPath(obj,
|
|
6005
|
-
if (!
|
|
6002
|
+
function getElementAtPath(obj, path28) {
|
|
6003
|
+
if (!path28)
|
|
6006
6004
|
return obj;
|
|
6007
|
-
return
|
|
6005
|
+
return path28.reduce((acc, key2) => acc?.[key2], obj);
|
|
6008
6006
|
}
|
|
6009
6007
|
function promiseAllObject(promisesObj) {
|
|
6010
6008
|
const keys = Object.keys(promisesObj);
|
|
@@ -6324,11 +6322,11 @@ function aborted(x, startIndex = 0) {
|
|
|
6324
6322
|
}
|
|
6325
6323
|
return false;
|
|
6326
6324
|
}
|
|
6327
|
-
function prefixIssues(
|
|
6325
|
+
function prefixIssues(path28, issues) {
|
|
6328
6326
|
return issues.map((iss) => {
|
|
6329
6327
|
var _a17;
|
|
6330
6328
|
(_a17 = iss).path ?? (_a17.path = []);
|
|
6331
|
-
iss.path.unshift(
|
|
6329
|
+
iss.path.unshift(path28);
|
|
6332
6330
|
return iss;
|
|
6333
6331
|
});
|
|
6334
6332
|
}
|
|
@@ -6465,7 +6463,7 @@ function treeifyError(error40, _mapper) {
|
|
|
6465
6463
|
return issue2.message;
|
|
6466
6464
|
};
|
|
6467
6465
|
const result = { errors: [] };
|
|
6468
|
-
const processError = (error41,
|
|
6466
|
+
const processError = (error41, path28 = []) => {
|
|
6469
6467
|
var _a17, _b8;
|
|
6470
6468
|
for (const issue2 of error41.issues) {
|
|
6471
6469
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -6475,7 +6473,7 @@ function treeifyError(error40, _mapper) {
|
|
|
6475
6473
|
} else if (issue2.code === "invalid_element") {
|
|
6476
6474
|
processError({ issues: issue2.issues }, issue2.path);
|
|
6477
6475
|
} else {
|
|
6478
|
-
const fullpath = [...
|
|
6476
|
+
const fullpath = [...path28, ...issue2.path];
|
|
6479
6477
|
if (fullpath.length === 0) {
|
|
6480
6478
|
result.errors.push(mapper(issue2));
|
|
6481
6479
|
continue;
|
|
@@ -6505,9 +6503,9 @@ function treeifyError(error40, _mapper) {
|
|
|
6505
6503
|
processError(error40);
|
|
6506
6504
|
return result;
|
|
6507
6505
|
}
|
|
6508
|
-
function toDotPath(
|
|
6506
|
+
function toDotPath(path28) {
|
|
6509
6507
|
const segs = [];
|
|
6510
|
-
for (const seg of
|
|
6508
|
+
for (const seg of path28) {
|
|
6511
6509
|
if (typeof seg === "number")
|
|
6512
6510
|
segs.push(`[${seg}]`);
|
|
6513
6511
|
else if (typeof seg === "symbol")
|
|
@@ -8106,7 +8104,7 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
|
|
|
8106
8104
|
defineLazy(inst._zod, "optout", () => def.options.some((o) => o._zod.optout === "optional") ? "optional" : void 0);
|
|
8107
8105
|
defineLazy(inst._zod, "values", () => {
|
|
8108
8106
|
if (def.options.every((o) => o._zod.values)) {
|
|
8109
|
-
return new Set(def.options.flatMap((
|
|
8107
|
+
return new Set(def.options.flatMap((option5) => Array.from(option5._zod.values)));
|
|
8110
8108
|
}
|
|
8111
8109
|
return void 0;
|
|
8112
8110
|
});
|
|
@@ -8120,8 +8118,8 @@ var $ZodUnion = /* @__PURE__ */ $constructor("$ZodUnion", (inst, def) => {
|
|
|
8120
8118
|
inst._zod.parse = (payload, ctx) => {
|
|
8121
8119
|
let async = false;
|
|
8122
8120
|
const results = [];
|
|
8123
|
-
for (const
|
|
8124
|
-
const result =
|
|
8121
|
+
for (const option5 of def.options) {
|
|
8122
|
+
const result = option5._zod.run({
|
|
8125
8123
|
value: payload.value,
|
|
8126
8124
|
issues: []
|
|
8127
8125
|
}, ctx);
|
|
@@ -8146,10 +8144,10 @@ var $ZodDiscriminatedUnion = /* @__PURE__ */ $constructor("$ZodDiscriminatedUnio
|
|
|
8146
8144
|
const _super = inst._zod.parse;
|
|
8147
8145
|
defineLazy(inst._zod, "propValues", () => {
|
|
8148
8146
|
const propValues = {};
|
|
8149
|
-
for (const
|
|
8150
|
-
const pv =
|
|
8147
|
+
for (const option5 of def.options) {
|
|
8148
|
+
const pv = option5._zod.propValues;
|
|
8151
8149
|
if (!pv || Object.keys(pv).length === 0)
|
|
8152
|
-
throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(
|
|
8150
|
+
throw new Error(`Invalid discriminated union option at index "${def.options.indexOf(option5)}"`);
|
|
8153
8151
|
for (const [k, v] of Object.entries(pv)) {
|
|
8154
8152
|
if (!propValues[k])
|
|
8155
8153
|
propValues[k] = /* @__PURE__ */ new Set();
|
|
@@ -15353,8 +15351,8 @@ function isTransforming(_schema, _ctx) {
|
|
|
15353
15351
|
return false;
|
|
15354
15352
|
}
|
|
15355
15353
|
case "union": {
|
|
15356
|
-
for (const
|
|
15357
|
-
if (isTransforming(
|
|
15354
|
+
for (const option5 of def.options) {
|
|
15355
|
+
if (isTransforming(option5, ctx))
|
|
15358
15356
|
return true;
|
|
15359
15357
|
}
|
|
15360
15358
|
return false;
|
|
@@ -26060,14 +26058,14 @@ function createAzure(options = {}) {
|
|
|
26060
26058
|
description: "Azure OpenAI resource name"
|
|
26061
26059
|
});
|
|
26062
26060
|
const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
|
|
26063
|
-
const url2 = ({ path:
|
|
26061
|
+
const url2 = ({ path: path28, modelId }) => {
|
|
26064
26062
|
var _a24;
|
|
26065
26063
|
const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
|
|
26066
26064
|
let fullUrl;
|
|
26067
26065
|
if (options.useDeploymentBasedUrls) {
|
|
26068
|
-
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${
|
|
26066
|
+
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path28}`);
|
|
26069
26067
|
} else {
|
|
26070
|
-
fullUrl = new URL(`${baseUrlPrefix}/v1${
|
|
26068
|
+
fullUrl = new URL(`${baseUrlPrefix}/v1${path28}`);
|
|
26071
26069
|
}
|
|
26072
26070
|
fullUrl.searchParams.set("api-version", apiVersion);
|
|
26073
26071
|
return fullUrl.toString();
|
|
@@ -34595,33 +34593,22 @@ var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
|
|
|
34595
34593
|
function isEvaluatorKind(value) {
|
|
34596
34594
|
return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
|
|
34597
34595
|
}
|
|
34598
|
-
function
|
|
34599
|
-
return typeof value === "string" && ["model_step", "tool_call", "tool_result", "message", "error"].includes(value);
|
|
34600
|
-
}
|
|
34601
|
-
function isTraceEvent(value) {
|
|
34602
|
-
if (typeof value !== "object" || value === null) {
|
|
34603
|
-
return false;
|
|
34604
|
-
}
|
|
34605
|
-
const candidate = value;
|
|
34606
|
-
return isTraceEventType(candidate.type) && typeof candidate.timestamp === "string";
|
|
34607
|
-
}
|
|
34608
|
-
function computeTraceSummary(trace2) {
|
|
34596
|
+
function computeTraceSummary(messages) {
|
|
34609
34597
|
const toolCallCounts = {};
|
|
34610
|
-
let
|
|
34611
|
-
for (const
|
|
34612
|
-
if (
|
|
34613
|
-
|
|
34614
|
-
|
|
34615
|
-
|
|
34616
|
-
errorCount++;
|
|
34598
|
+
let totalToolCalls = 0;
|
|
34599
|
+
for (const message of messages) {
|
|
34600
|
+
if (!message.toolCalls) continue;
|
|
34601
|
+
for (const toolCall of message.toolCalls) {
|
|
34602
|
+
toolCallCounts[toolCall.tool] = (toolCallCounts[toolCall.tool] ?? 0) + 1;
|
|
34603
|
+
totalToolCalls++;
|
|
34617
34604
|
}
|
|
34618
34605
|
}
|
|
34619
34606
|
const toolNames = Object.keys(toolCallCounts).sort();
|
|
34620
34607
|
return {
|
|
34621
|
-
eventCount:
|
|
34608
|
+
eventCount: totalToolCalls,
|
|
34622
34609
|
toolNames,
|
|
34623
34610
|
toolCallsByName: toolCallCounts,
|
|
34624
|
-
errorCount
|
|
34611
|
+
errorCount: 0
|
|
34625
34612
|
};
|
|
34626
34613
|
}
|
|
34627
34614
|
function extractCodeBlocks(segments) {
|
|
@@ -34869,7 +34856,8 @@ var TEMPLATE_VARIABLES = {
|
|
|
34869
34856
|
QUESTION: "question",
|
|
34870
34857
|
EXPECTED_OUTCOME: "expected_outcome",
|
|
34871
34858
|
REFERENCE_ANSWER: "reference_answer",
|
|
34872
|
-
INPUT_MESSAGES: "input_messages"
|
|
34859
|
+
INPUT_MESSAGES: "input_messages",
|
|
34860
|
+
OUTPUT_MESSAGES: "output_messages"
|
|
34873
34861
|
};
|
|
34874
34862
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
34875
34863
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
@@ -35738,16 +35726,16 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
35738
35726
|
}) : [];
|
|
35739
35727
|
const codeSnippets = extractCodeBlocks(inputSegments);
|
|
35740
35728
|
let referenceAnswer = "";
|
|
35741
|
-
if (outputSegments.length >
|
|
35742
|
-
|
|
35743
|
-
|
|
35744
|
-
const
|
|
35745
|
-
if (typeof
|
|
35746
|
-
referenceAnswer =
|
|
35747
|
-
} else if (
|
|
35748
|
-
referenceAnswer = JSON.stringify(
|
|
35749
|
-
} else if (
|
|
35750
|
-
referenceAnswer = JSON.stringify(
|
|
35729
|
+
if (outputSegments.length > 0) {
|
|
35730
|
+
const lastMessage = outputSegments[outputSegments.length - 1];
|
|
35731
|
+
const content = lastMessage.content;
|
|
35732
|
+
const toolCalls = lastMessage.tool_calls;
|
|
35733
|
+
if (typeof content === "string") {
|
|
35734
|
+
referenceAnswer = content;
|
|
35735
|
+
} else if (content !== void 0 && content !== null) {
|
|
35736
|
+
referenceAnswer = JSON.stringify(content, null, 2);
|
|
35737
|
+
} else if (toolCalls !== void 0 && toolCalls !== null) {
|
|
35738
|
+
referenceAnswer = JSON.stringify(toolCalls, null, 2);
|
|
35751
35739
|
}
|
|
35752
35740
|
}
|
|
35753
35741
|
const question = inputTextParts.map((part) => part.trim()).filter((part) => part.length > 0).join(" ");
|
|
@@ -36069,11 +36057,11 @@ async function invokeModel(options) {
|
|
|
36069
36057
|
return mapResponse(result);
|
|
36070
36058
|
}
|
|
36071
36059
|
function mapResponse(result) {
|
|
36060
|
+
const content = result.text ?? "";
|
|
36072
36061
|
return {
|
|
36073
|
-
text: result.text ?? "",
|
|
36074
|
-
reasoning: result.reasoningText ?? void 0,
|
|
36075
36062
|
raw: result,
|
|
36076
|
-
usage: toJsonObject(result.totalUsage ?? result.usage)
|
|
36063
|
+
usage: toJsonObject(result.totalUsage ?? result.usage),
|
|
36064
|
+
outputMessages: [{ role: "assistant", content }]
|
|
36077
36065
|
};
|
|
36078
36066
|
}
|
|
36079
36067
|
function toJsonObject(value) {
|
|
@@ -36180,7 +36168,7 @@ async function withRetry(fn, retryConfig, signal) {
|
|
|
36180
36168
|
}
|
|
36181
36169
|
var execAsync2 = promisify2(execWithCallback);
|
|
36182
36170
|
var DEFAULT_MAX_BUFFER = 10 * 1024 * 1024;
|
|
36183
|
-
async function defaultCommandRunner(
|
|
36171
|
+
async function defaultCommandRunner(command6, options) {
|
|
36184
36172
|
const execOptions = {
|
|
36185
36173
|
cwd: options.cwd,
|
|
36186
36174
|
env: options.env,
|
|
@@ -36190,7 +36178,7 @@ async function defaultCommandRunner(command5, options) {
|
|
|
36190
36178
|
shell: process.platform === "win32" ? "powershell.exe" : void 0
|
|
36191
36179
|
};
|
|
36192
36180
|
try {
|
|
36193
|
-
const { stdout, stderr } = await execAsync2(
|
|
36181
|
+
const { stdout, stderr } = await execAsync2(command6, execOptions);
|
|
36194
36182
|
return {
|
|
36195
36183
|
stdout,
|
|
36196
36184
|
stderr,
|
|
@@ -36219,6 +36207,7 @@ var CliProvider = class {
|
|
|
36219
36207
|
config;
|
|
36220
36208
|
runCommand;
|
|
36221
36209
|
verbose;
|
|
36210
|
+
keepTempFiles;
|
|
36222
36211
|
healthcheckPromise;
|
|
36223
36212
|
constructor(targetName, config2, runner = defaultCommandRunner) {
|
|
36224
36213
|
this.targetName = targetName;
|
|
@@ -36226,6 +36215,7 @@ var CliProvider = class {
|
|
|
36226
36215
|
this.config = config2;
|
|
36227
36216
|
this.runCommand = runner;
|
|
36228
36217
|
this.verbose = config2.verbose ?? false;
|
|
36218
|
+
this.keepTempFiles = config2.keepTempFiles ?? false;
|
|
36229
36219
|
}
|
|
36230
36220
|
async invoke(request) {
|
|
36231
36221
|
if (request.signal?.aborted) {
|
|
@@ -36263,8 +36253,7 @@ var CliProvider = class {
|
|
|
36263
36253
|
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
36264
36254
|
const parsed = this.parseOutputContent(responseContent);
|
|
36265
36255
|
return {
|
|
36266
|
-
|
|
36267
|
-
trace: parsed.trace,
|
|
36256
|
+
outputMessages: parsed.outputMessages,
|
|
36268
36257
|
raw: {
|
|
36269
36258
|
command: renderedCommand,
|
|
36270
36259
|
stderr: result.stderr,
|
|
@@ -36343,7 +36332,7 @@ var CliProvider = class {
|
|
|
36343
36332
|
const evalCaseId = request.evalCaseId;
|
|
36344
36333
|
if (!evalCaseId) {
|
|
36345
36334
|
return {
|
|
36346
|
-
|
|
36335
|
+
outputMessages: [],
|
|
36347
36336
|
raw: {
|
|
36348
36337
|
command: renderedCommand,
|
|
36349
36338
|
stderr: result.stderr,
|
|
@@ -36356,7 +36345,7 @@ var CliProvider = class {
|
|
|
36356
36345
|
const parsed = recordsById.get(evalCaseId);
|
|
36357
36346
|
if (!parsed) {
|
|
36358
36347
|
return {
|
|
36359
|
-
|
|
36348
|
+
outputMessages: [],
|
|
36360
36349
|
raw: {
|
|
36361
36350
|
command: renderedCommand,
|
|
36362
36351
|
stderr: result.stderr,
|
|
@@ -36367,9 +36356,7 @@ var CliProvider = class {
|
|
|
36367
36356
|
};
|
|
36368
36357
|
}
|
|
36369
36358
|
return {
|
|
36370
|
-
|
|
36371
|
-
trace: parsed.trace,
|
|
36372
|
-
traceRef: parsed.traceRef,
|
|
36359
|
+
outputMessages: parsed.outputMessages,
|
|
36373
36360
|
raw: {
|
|
36374
36361
|
command: renderedCommand,
|
|
36375
36362
|
stderr: result.stderr,
|
|
@@ -36384,28 +36371,81 @@ var CliProvider = class {
|
|
|
36384
36371
|
}
|
|
36385
36372
|
/**
|
|
36386
36373
|
* Parse output content from CLI.
|
|
36387
|
-
* If the content is valid JSON with
|
|
36388
|
-
*
|
|
36374
|
+
* If the content is valid JSON with 'output_messages' or 'text' field, extract them.
|
|
36375
|
+
* If only 'text' is provided, wrap it in outputMessages.
|
|
36376
|
+
* Otherwise, treat the entire content as plain text wrapped in outputMessages.
|
|
36389
36377
|
*/
|
|
36390
36378
|
parseOutputContent(content) {
|
|
36391
36379
|
try {
|
|
36392
36380
|
const parsed = JSON.parse(content);
|
|
36393
|
-
if (typeof parsed === "object" && parsed !== null
|
|
36381
|
+
if (typeof parsed === "object" && parsed !== null) {
|
|
36394
36382
|
const obj = parsed;
|
|
36395
|
-
const
|
|
36396
|
-
|
|
36397
|
-
|
|
36383
|
+
const outputMessages = this.parseOutputMessages(obj.output_messages);
|
|
36384
|
+
if (outputMessages && outputMessages.length > 0) {
|
|
36385
|
+
return { outputMessages };
|
|
36386
|
+
}
|
|
36387
|
+
if ("text" in obj) {
|
|
36388
|
+
const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
|
|
36389
|
+
return { outputMessages: [{ role: "assistant", content: text2 }] };
|
|
36390
|
+
}
|
|
36398
36391
|
}
|
|
36399
36392
|
} catch {
|
|
36400
36393
|
}
|
|
36401
|
-
return {
|
|
36394
|
+
return { outputMessages: [{ role: "assistant", content }] };
|
|
36402
36395
|
}
|
|
36403
|
-
|
|
36404
|
-
|
|
36396
|
+
/**
|
|
36397
|
+
* Parse output_messages from JSONL (snake_case) and convert to OutputMessage[] (camelCase).
|
|
36398
|
+
*/
|
|
36399
|
+
parseOutputMessages(outputMessages) {
|
|
36400
|
+
if (!Array.isArray(outputMessages)) {
|
|
36405
36401
|
return void 0;
|
|
36406
36402
|
}
|
|
36407
|
-
const
|
|
36408
|
-
|
|
36403
|
+
const messages = [];
|
|
36404
|
+
for (const msg of outputMessages) {
|
|
36405
|
+
if (typeof msg !== "object" || msg === null) {
|
|
36406
|
+
continue;
|
|
36407
|
+
}
|
|
36408
|
+
const rawMsg = msg;
|
|
36409
|
+
if (typeof rawMsg.role !== "string") {
|
|
36410
|
+
continue;
|
|
36411
|
+
}
|
|
36412
|
+
const message = {
|
|
36413
|
+
role: rawMsg.role,
|
|
36414
|
+
name: typeof rawMsg.name === "string" ? rawMsg.name : void 0,
|
|
36415
|
+
content: rawMsg.content,
|
|
36416
|
+
toolCalls: this.parseToolCalls(rawMsg.tool_calls),
|
|
36417
|
+
timestamp: typeof rawMsg.timestamp === "string" ? rawMsg.timestamp : void 0,
|
|
36418
|
+
metadata: typeof rawMsg.metadata === "object" && rawMsg.metadata !== null ? rawMsg.metadata : void 0
|
|
36419
|
+
};
|
|
36420
|
+
messages.push(message);
|
|
36421
|
+
}
|
|
36422
|
+
return messages.length > 0 ? messages : void 0;
|
|
36423
|
+
}
|
|
36424
|
+
/**
|
|
36425
|
+
* Parse tool_calls from JSONL (snake_case) and convert to ToolCall[] format.
|
|
36426
|
+
*/
|
|
36427
|
+
parseToolCalls(toolCalls) {
|
|
36428
|
+
if (!Array.isArray(toolCalls)) {
|
|
36429
|
+
return void 0;
|
|
36430
|
+
}
|
|
36431
|
+
const calls = [];
|
|
36432
|
+
for (const call of toolCalls) {
|
|
36433
|
+
if (typeof call !== "object" || call === null) {
|
|
36434
|
+
continue;
|
|
36435
|
+
}
|
|
36436
|
+
const rawCall = call;
|
|
36437
|
+
if (typeof rawCall.tool !== "string") {
|
|
36438
|
+
continue;
|
|
36439
|
+
}
|
|
36440
|
+
calls.push({
|
|
36441
|
+
tool: rawCall.tool,
|
|
36442
|
+
input: rawCall.input,
|
|
36443
|
+
output: rawCall.output,
|
|
36444
|
+
id: typeof rawCall.id === "string" ? rawCall.id : void 0,
|
|
36445
|
+
timestamp: typeof rawCall.timestamp === "string" ? rawCall.timestamp : void 0
|
|
36446
|
+
});
|
|
36447
|
+
}
|
|
36448
|
+
return calls.length > 0 ? calls : void 0;
|
|
36409
36449
|
}
|
|
36410
36450
|
parseJsonlBatchOutput(content) {
|
|
36411
36451
|
const records = /* @__PURE__ */ new Map();
|
|
@@ -36429,12 +36469,16 @@ var CliProvider = class {
|
|
|
36429
36469
|
if (records.has(id)) {
|
|
36430
36470
|
throw new Error(`CLI batch output contains duplicate id: ${id}`);
|
|
36431
36471
|
}
|
|
36432
|
-
const
|
|
36433
|
-
|
|
36472
|
+
const parsedOutputMessages = this.parseOutputMessages(obj.output_messages);
|
|
36473
|
+
let outputMessages;
|
|
36474
|
+
if (parsedOutputMessages && parsedOutputMessages.length > 0) {
|
|
36475
|
+
outputMessages = parsedOutputMessages;
|
|
36476
|
+
} else {
|
|
36477
|
+
const text2 = typeof obj.text === "string" ? obj.text : obj.text === void 0 ? "" : JSON.stringify(obj.text);
|
|
36478
|
+
outputMessages = text2 ? [{ role: "assistant", content: text2 }] : [];
|
|
36479
|
+
}
|
|
36434
36480
|
records.set(id, {
|
|
36435
|
-
|
|
36436
|
-
trace: this.parseTrace(obj.trace),
|
|
36437
|
-
traceRef
|
|
36481
|
+
outputMessages
|
|
36438
36482
|
});
|
|
36439
36483
|
}
|
|
36440
36484
|
return records;
|
|
@@ -36447,8 +36491,10 @@ var CliProvider = class {
|
|
|
36447
36491
|
const errorMsg = error40 instanceof Error ? error40.message : String(error40);
|
|
36448
36492
|
throw new Error(`Failed to read output file '${filePath}': ${errorMsg}`);
|
|
36449
36493
|
} finally {
|
|
36450
|
-
|
|
36451
|
-
|
|
36494
|
+
if (!this.keepTempFiles) {
|
|
36495
|
+
await fs.unlink(filePath).catch(() => {
|
|
36496
|
+
});
|
|
36497
|
+
}
|
|
36452
36498
|
}
|
|
36453
36499
|
}
|
|
36454
36500
|
async ensureHealthy(signal) {
|
|
@@ -36768,7 +36814,6 @@ var CodexProvider = class {
|
|
|
36768
36814
|
const parsed = parseCodexJson(result.stdout);
|
|
36769
36815
|
const assistantText = extractAssistantText(parsed);
|
|
36770
36816
|
return {
|
|
36771
|
-
text: assistantText,
|
|
36772
36817
|
raw: {
|
|
36773
36818
|
response: parsed,
|
|
36774
36819
|
stdout: result.stdout,
|
|
@@ -36780,7 +36825,8 @@ var CodexProvider = class {
|
|
|
36780
36825
|
workspace: workspaceRoot,
|
|
36781
36826
|
inputFiles,
|
|
36782
36827
|
logFile: logger?.filePath
|
|
36783
|
-
}
|
|
36828
|
+
},
|
|
36829
|
+
outputMessages: [{ role: "assistant", content: assistantText }]
|
|
36784
36830
|
};
|
|
36785
36831
|
} finally {
|
|
36786
36832
|
await logger?.close();
|
|
@@ -37400,7 +37446,6 @@ var MockProvider = class {
|
|
|
37400
37446
|
delayMs;
|
|
37401
37447
|
delayMinMs;
|
|
37402
37448
|
delayMaxMs;
|
|
37403
|
-
trace;
|
|
37404
37449
|
constructor(targetName, config2) {
|
|
37405
37450
|
this.id = `mock:${targetName}`;
|
|
37406
37451
|
this.targetName = targetName;
|
|
@@ -37408,7 +37453,6 @@ var MockProvider = class {
|
|
|
37408
37453
|
this.delayMs = config2.delayMs ?? 0;
|
|
37409
37454
|
this.delayMinMs = config2.delayMinMs ?? 0;
|
|
37410
37455
|
this.delayMaxMs = config2.delayMaxMs ?? 0;
|
|
37411
|
-
this.trace = config2.trace;
|
|
37412
37456
|
}
|
|
37413
37457
|
async invoke(request) {
|
|
37414
37458
|
const delay2 = this.calculateDelay();
|
|
@@ -37416,12 +37460,11 @@ var MockProvider = class {
|
|
|
37416
37460
|
await new Promise((resolve2) => setTimeout(resolve2, delay2));
|
|
37417
37461
|
}
|
|
37418
37462
|
return {
|
|
37419
|
-
|
|
37463
|
+
outputMessages: [{ role: "assistant", content: this.cannedResponse }],
|
|
37420
37464
|
raw: {
|
|
37421
37465
|
question: request.question,
|
|
37422
37466
|
guidelines: request.guidelines
|
|
37423
|
-
}
|
|
37424
|
-
trace: this.trace
|
|
37467
|
+
}
|
|
37425
37468
|
};
|
|
37426
37469
|
}
|
|
37427
37470
|
calculateDelay() {
|
|
@@ -37501,7 +37544,7 @@ var VSCodeProvider = class {
|
|
|
37501
37544
|
}
|
|
37502
37545
|
if (this.config.dryRun) {
|
|
37503
37546
|
return {
|
|
37504
|
-
|
|
37547
|
+
outputMessages: [],
|
|
37505
37548
|
raw: {
|
|
37506
37549
|
session,
|
|
37507
37550
|
inputFiles
|
|
@@ -37510,7 +37553,7 @@ var VSCodeProvider = class {
|
|
|
37510
37553
|
}
|
|
37511
37554
|
const responseText = await readTextFile(session.responseFile);
|
|
37512
37555
|
return {
|
|
37513
|
-
|
|
37556
|
+
outputMessages: [{ role: "assistant", content: responseText }],
|
|
37514
37557
|
raw: {
|
|
37515
37558
|
session,
|
|
37516
37559
|
inputFiles
|
|
@@ -37548,7 +37591,7 @@ var VSCodeProvider = class {
|
|
|
37548
37591
|
}
|
|
37549
37592
|
if (this.config.dryRun) {
|
|
37550
37593
|
return normalizedRequests.map(({ inputFiles }) => ({
|
|
37551
|
-
|
|
37594
|
+
outputMessages: [],
|
|
37552
37595
|
raw: {
|
|
37553
37596
|
session,
|
|
37554
37597
|
inputFiles,
|
|
@@ -37565,7 +37608,7 @@ var VSCodeProvider = class {
|
|
|
37565
37608
|
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
37566
37609
|
const responseText = await readTextFile(responseFile);
|
|
37567
37610
|
responses.push({
|
|
37568
|
-
|
|
37611
|
+
outputMessages: [{ role: "assistant", content: responseText }],
|
|
37569
37612
|
raw: {
|
|
37570
37613
|
session,
|
|
37571
37614
|
inputFiles: normalizedRequests[index]?.inputFiles,
|
|
@@ -37853,6 +37896,7 @@ var LlmJudgeEvaluator = class {
|
|
|
37853
37896
|
null,
|
|
37854
37897
|
2
|
|
37855
37898
|
),
|
|
37899
|
+
[TEMPLATE_VARIABLES.OUTPUT_MESSAGES]: JSON.stringify(context.outputMessages ?? [], null, 2),
|
|
37856
37900
|
[TEMPLATE_VARIABLES.CANDIDATE_ANSWER]: context.candidate.trim(),
|
|
37857
37901
|
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context.evalCase.reference_answer ?? "").trim(),
|
|
37858
37902
|
[TEMPLATE_VARIABLES.EXPECTED_OUTCOME]: context.evalCase.expected_outcome.trim(),
|
|
@@ -37877,7 +37921,7 @@ var LlmJudgeEvaluator = class {
|
|
|
37877
37921
|
const score = clampScore(data.score);
|
|
37878
37922
|
const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
|
|
37879
37923
|
const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
|
|
37880
|
-
const reasoning = data.reasoning
|
|
37924
|
+
const reasoning = data.reasoning;
|
|
37881
37925
|
const expectedAspectCount = Math.max(hits.length + misses.length, 1);
|
|
37882
37926
|
return {
|
|
37883
37927
|
score,
|
|
@@ -37979,7 +38023,9 @@ var LlmJudgeEvaluator = class {
|
|
|
37979
38023
|
maxOutputTokens: this.maxOutputTokens,
|
|
37980
38024
|
temperature: this.temperature
|
|
37981
38025
|
});
|
|
37982
|
-
const data = schema.parse(
|
|
38026
|
+
const data = schema.parse(
|
|
38027
|
+
parseJsonFromText(extractLastAssistantContent(response.outputMessages))
|
|
38028
|
+
);
|
|
37983
38029
|
return { data, providerResponse: response };
|
|
37984
38030
|
} catch (e) {
|
|
37985
38031
|
lastError = e instanceof Error ? e : new Error(String(e));
|
|
@@ -38065,13 +38111,13 @@ var CodeEvaluator = class {
|
|
|
38065
38111
|
expected_messages: context.evalCase.expected_messages,
|
|
38066
38112
|
reference_answer: context.evalCase.reference_answer,
|
|
38067
38113
|
candidate_answer: context.candidate,
|
|
38114
|
+
output_messages: context.outputMessages ?? null,
|
|
38068
38115
|
guideline_files: context.evalCase.guideline_paths,
|
|
38069
38116
|
input_files: context.evalCase.file_paths.filter(
|
|
38070
38117
|
(path132) => !context.evalCase.guideline_paths.includes(path132)
|
|
38071
38118
|
),
|
|
38072
38119
|
input_messages: context.evalCase.input_messages,
|
|
38073
|
-
|
|
38074
|
-
candidate_trace_summary: context.candidateTraceSummary ?? null
|
|
38120
|
+
candidate_trace_summary: context.traceSummary ?? null
|
|
38075
38121
|
},
|
|
38076
38122
|
null,
|
|
38077
38123
|
2
|
|
@@ -38198,8 +38244,19 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38198
38244
|
this.config = options.config;
|
|
38199
38245
|
}
|
|
38200
38246
|
evaluate(context) {
|
|
38201
|
-
const {
|
|
38202
|
-
|
|
38247
|
+
const { outputMessages, traceSummary } = context;
|
|
38248
|
+
const toolCalls = this.extractToolCallsFromMessages(outputMessages);
|
|
38249
|
+
if (toolCalls.length === 0 && !traceSummary) {
|
|
38250
|
+
return {
|
|
38251
|
+
score: 0,
|
|
38252
|
+
verdict: "fail",
|
|
38253
|
+
hits: [],
|
|
38254
|
+
misses: ["No trace available for evaluation"],
|
|
38255
|
+
expectedAspectCount: 1
|
|
38256
|
+
};
|
|
38257
|
+
}
|
|
38258
|
+
const summary = toolCalls.length > 0 ? this.buildSummary(toolCalls) : traceSummary;
|
|
38259
|
+
if (!summary) {
|
|
38203
38260
|
return {
|
|
38204
38261
|
score: 0,
|
|
38205
38262
|
verdict: "fail",
|
|
@@ -38210,11 +38267,11 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38210
38267
|
}
|
|
38211
38268
|
switch (this.config.mode) {
|
|
38212
38269
|
case "any_order":
|
|
38213
|
-
return this.evaluateAnyOrder(
|
|
38270
|
+
return this.evaluateAnyOrder(summary);
|
|
38214
38271
|
case "in_order":
|
|
38215
|
-
return this.evaluateInOrder(
|
|
38272
|
+
return this.evaluateInOrder(toolCalls);
|
|
38216
38273
|
case "exact":
|
|
38217
|
-
return this.evaluateExact(
|
|
38274
|
+
return this.evaluateExact(toolCalls);
|
|
38218
38275
|
default:
|
|
38219
38276
|
return {
|
|
38220
38277
|
score: 0,
|
|
@@ -38225,6 +38282,39 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38225
38282
|
};
|
|
38226
38283
|
}
|
|
38227
38284
|
}
|
|
38285
|
+
/**
|
|
38286
|
+
* Extract tool calls from output messages.
|
|
38287
|
+
*/
|
|
38288
|
+
extractToolCallsFromMessages(messages) {
|
|
38289
|
+
if (!messages) {
|
|
38290
|
+
return [];
|
|
38291
|
+
}
|
|
38292
|
+
const toolCalls = [];
|
|
38293
|
+
for (const message of messages) {
|
|
38294
|
+
if (message.toolCalls) {
|
|
38295
|
+
for (const call of message.toolCalls) {
|
|
38296
|
+
toolCalls.push({ name: call.tool });
|
|
38297
|
+
}
|
|
38298
|
+
}
|
|
38299
|
+
}
|
|
38300
|
+
return toolCalls;
|
|
38301
|
+
}
|
|
38302
|
+
/**
|
|
38303
|
+
* Build a summary from extracted tool calls.
|
|
38304
|
+
*/
|
|
38305
|
+
buildSummary(toolCalls) {
|
|
38306
|
+
const toolCallsByName = {};
|
|
38307
|
+
for (const call of toolCalls) {
|
|
38308
|
+
toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1;
|
|
38309
|
+
}
|
|
38310
|
+
const toolNames = Object.keys(toolCallsByName).sort();
|
|
38311
|
+
return {
|
|
38312
|
+
eventCount: toolCalls.length,
|
|
38313
|
+
toolNames,
|
|
38314
|
+
toolCallsByName,
|
|
38315
|
+
errorCount: 0
|
|
38316
|
+
};
|
|
38317
|
+
}
|
|
38228
38318
|
evaluateAnyOrder(summary) {
|
|
38229
38319
|
const minimums = this.config.minimums ?? {};
|
|
38230
38320
|
const toolNames = Object.keys(minimums);
|
|
@@ -38257,7 +38347,7 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38257
38347
|
expectedAspectCount: toolNames.length
|
|
38258
38348
|
};
|
|
38259
38349
|
}
|
|
38260
|
-
evaluateInOrder(
|
|
38350
|
+
evaluateInOrder(toolCalls) {
|
|
38261
38351
|
const expected = this.config.expected ?? [];
|
|
38262
38352
|
if (expected.length === 0) {
|
|
38263
38353
|
return {
|
|
@@ -38268,15 +38358,14 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38268
38358
|
expectedAspectCount: 0
|
|
38269
38359
|
};
|
|
38270
38360
|
}
|
|
38271
|
-
const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
|
|
38272
38361
|
const hits = [];
|
|
38273
38362
|
const misses = [];
|
|
38274
38363
|
let actualIndex = 0;
|
|
38275
38364
|
for (let i = 0; i < expected.length; i++) {
|
|
38276
38365
|
const expectedTool = expected[i].tool;
|
|
38277
38366
|
let found = false;
|
|
38278
|
-
while (actualIndex <
|
|
38279
|
-
if (
|
|
38367
|
+
while (actualIndex < toolCalls.length) {
|
|
38368
|
+
if (toolCalls[actualIndex].name === expectedTool) {
|
|
38280
38369
|
hits.push(`Found ${expectedTool} at position ${actualIndex}`);
|
|
38281
38370
|
actualIndex++;
|
|
38282
38371
|
found = true;
|
|
@@ -38297,7 +38386,7 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38297
38386
|
expectedAspectCount: expected.length
|
|
38298
38387
|
};
|
|
38299
38388
|
}
|
|
38300
|
-
evaluateExact(
|
|
38389
|
+
evaluateExact(toolCalls) {
|
|
38301
38390
|
const expected = this.config.expected ?? [];
|
|
38302
38391
|
if (expected.length === 0) {
|
|
38303
38392
|
return {
|
|
@@ -38308,16 +38397,15 @@ var ToolTrajectoryEvaluator = class {
|
|
|
38308
38397
|
expectedAspectCount: 0
|
|
38309
38398
|
};
|
|
38310
38399
|
}
|
|
38311
|
-
const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
|
|
38312
38400
|
const hits = [];
|
|
38313
38401
|
const misses = [];
|
|
38314
|
-
if (
|
|
38315
|
-
misses.push(`Expected ${expected.length} tool calls, got ${
|
|
38402
|
+
if (toolCalls.length !== expected.length) {
|
|
38403
|
+
misses.push(`Expected ${expected.length} tool calls, got ${toolCalls.length}`);
|
|
38316
38404
|
}
|
|
38317
|
-
const checkLength = Math.min(expected.length,
|
|
38405
|
+
const checkLength = Math.min(expected.length, toolCalls.length);
|
|
38318
38406
|
for (let i = 0; i < checkLength; i++) {
|
|
38319
38407
|
const expectedTool = expected[i].tool;
|
|
38320
|
-
const actualTool =
|
|
38408
|
+
const actualTool = toolCalls[i].name;
|
|
38321
38409
|
if (actualTool === expectedTool) {
|
|
38322
38410
|
hits.push(`Position ${i}: ${expectedTool} \u2713`);
|
|
38323
38411
|
} else {
|
|
@@ -38531,11 +38619,13 @@ var CompositeEvaluator = class {
|
|
|
38531
38619
|
evalCaseId: context.evalCase.id,
|
|
38532
38620
|
attempt: context.attempt
|
|
38533
38621
|
});
|
|
38534
|
-
const data = freeformEvaluationSchema.parse(
|
|
38622
|
+
const data = freeformEvaluationSchema.parse(
|
|
38623
|
+
parseJsonFromText(extractLastAssistantContent(response.outputMessages))
|
|
38624
|
+
);
|
|
38535
38625
|
const score = clampScore(data.score);
|
|
38536
38626
|
const hits = Array.isArray(data.hits) ? data.hits.filter(isNonEmptyString).slice(0, 4) : [];
|
|
38537
38627
|
const misses = Array.isArray(data.misses) ? data.misses.filter(isNonEmptyString).slice(0, 4) : [];
|
|
38538
|
-
const reasoning = data.reasoning
|
|
38628
|
+
const reasoning = data.reasoning;
|
|
38539
38629
|
return {
|
|
38540
38630
|
score,
|
|
38541
38631
|
verdict: scoreToVerdict(score),
|
|
@@ -38947,11 +39037,14 @@ async function runBatchEvaluation(options) {
|
|
|
38947
39037
|
const evalCase = evalCases[i];
|
|
38948
39038
|
const promptInputs = promptInputsList[i];
|
|
38949
39039
|
const providerResponse = batchResponse[i];
|
|
39040
|
+
const outputMessages = providerResponse.outputMessages;
|
|
39041
|
+
const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
|
|
39042
|
+
const candidate = extractLastAssistantContent(outputMessages);
|
|
38950
39043
|
let result;
|
|
38951
39044
|
try {
|
|
38952
39045
|
result = await evaluateCandidate({
|
|
38953
39046
|
evalCase,
|
|
38954
|
-
candidate
|
|
39047
|
+
candidate,
|
|
38955
39048
|
target,
|
|
38956
39049
|
provider,
|
|
38957
39050
|
evaluators: evaluatorRegistry,
|
|
@@ -38959,7 +39052,9 @@ async function runBatchEvaluation(options) {
|
|
|
38959
39052
|
nowFn,
|
|
38960
39053
|
attempt: 0,
|
|
38961
39054
|
judgeProvider: await resolveJudgeProvider(target),
|
|
38962
|
-
agentTimeoutMs
|
|
39055
|
+
agentTimeoutMs,
|
|
39056
|
+
outputMessages,
|
|
39057
|
+
traceSummary
|
|
38963
39058
|
});
|
|
38964
39059
|
} catch (error40) {
|
|
38965
39060
|
const errorResult = buildErrorResult(
|
|
@@ -39063,21 +39158,13 @@ async function runEvalCase(options) {
|
|
|
39063
39158
|
if (cacheKey && cache && !cachedResponse) {
|
|
39064
39159
|
await cache.set(cacheKey, providerResponse);
|
|
39065
39160
|
}
|
|
39066
|
-
|
|
39067
|
-
|
|
39068
|
-
|
|
39069
|
-
const rawTrace = await readJsonFile(providerResponse.traceRef);
|
|
39070
|
-
if (Array.isArray(rawTrace) && rawTrace.every(isTraceEvent)) {
|
|
39071
|
-
candidateTrace = rawTrace;
|
|
39072
|
-
}
|
|
39073
|
-
} catch {
|
|
39074
|
-
}
|
|
39075
|
-
}
|
|
39076
|
-
const candidateTraceSummary = candidateTrace ? computeTraceSummary(candidateTrace) : void 0;
|
|
39161
|
+
const outputMessages = providerResponse.outputMessages;
|
|
39162
|
+
const traceSummary = outputMessages ? computeTraceSummary(outputMessages) : void 0;
|
|
39163
|
+
const candidate = extractLastAssistantContent(outputMessages);
|
|
39077
39164
|
try {
|
|
39078
39165
|
return await evaluateCandidate({
|
|
39079
39166
|
evalCase,
|
|
39080
|
-
candidate
|
|
39167
|
+
candidate,
|
|
39081
39168
|
target,
|
|
39082
39169
|
provider,
|
|
39083
39170
|
evaluators,
|
|
@@ -39086,9 +39173,8 @@ async function runEvalCase(options) {
|
|
|
39086
39173
|
attempt,
|
|
39087
39174
|
judgeProvider,
|
|
39088
39175
|
agentTimeoutMs,
|
|
39089
|
-
|
|
39090
|
-
|
|
39091
|
-
candidateTraceSummary
|
|
39176
|
+
outputMessages,
|
|
39177
|
+
traceSummary
|
|
39092
39178
|
});
|
|
39093
39179
|
} catch (error40) {
|
|
39094
39180
|
return buildErrorResult(evalCase, target.name, nowFn(), error40, promptInputs, provider);
|
|
@@ -39106,9 +39192,8 @@ async function evaluateCandidate(options) {
|
|
|
39106
39192
|
attempt,
|
|
39107
39193
|
judgeProvider,
|
|
39108
39194
|
agentTimeoutMs,
|
|
39109
|
-
|
|
39110
|
-
|
|
39111
|
-
candidateTraceSummary
|
|
39195
|
+
outputMessages,
|
|
39196
|
+
traceSummary
|
|
39112
39197
|
} = options;
|
|
39113
39198
|
const gradeTimestamp = nowFn();
|
|
39114
39199
|
const { score, evaluatorResults } = await runEvaluatorsForCase({
|
|
@@ -39122,9 +39207,8 @@ async function evaluateCandidate(options) {
|
|
|
39122
39207
|
now: gradeTimestamp,
|
|
39123
39208
|
judgeProvider,
|
|
39124
39209
|
agentTimeoutMs,
|
|
39125
|
-
|
|
39126
|
-
|
|
39127
|
-
candidateTraceSummary
|
|
39210
|
+
outputMessages,
|
|
39211
|
+
traceSummary
|
|
39128
39212
|
});
|
|
39129
39213
|
const completedAt = nowFn();
|
|
39130
39214
|
let agentProviderRequest;
|
|
@@ -39162,7 +39246,7 @@ async function evaluateCandidate(options) {
|
|
|
39162
39246
|
lm_provider_request: lmProviderRequest,
|
|
39163
39247
|
evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
|
|
39164
39248
|
evaluator_results: evaluatorResults,
|
|
39165
|
-
trace_summary:
|
|
39249
|
+
trace_summary: traceSummary
|
|
39166
39250
|
};
|
|
39167
39251
|
}
|
|
39168
39252
|
async function runEvaluatorsForCase(options) {
|
|
@@ -39177,9 +39261,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
39177
39261
|
now,
|
|
39178
39262
|
judgeProvider,
|
|
39179
39263
|
agentTimeoutMs,
|
|
39180
|
-
|
|
39181
|
-
|
|
39182
|
-
candidateTraceSummary
|
|
39264
|
+
outputMessages,
|
|
39265
|
+
traceSummary
|
|
39183
39266
|
} = options;
|
|
39184
39267
|
if (evalCase.evaluators && evalCase.evaluators.length > 0) {
|
|
39185
39268
|
return runEvaluatorList({
|
|
@@ -39194,9 +39277,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
39194
39277
|
now,
|
|
39195
39278
|
judgeProvider,
|
|
39196
39279
|
agentTimeoutMs,
|
|
39197
|
-
|
|
39198
|
-
|
|
39199
|
-
candidateTraceSummary
|
|
39280
|
+
outputMessages,
|
|
39281
|
+
traceSummary
|
|
39200
39282
|
});
|
|
39201
39283
|
}
|
|
39202
39284
|
const evaluatorKind = evalCase.evaluator ?? "llm_judge";
|
|
@@ -39213,9 +39295,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
39213
39295
|
promptInputs,
|
|
39214
39296
|
now,
|
|
39215
39297
|
judgeProvider,
|
|
39216
|
-
|
|
39217
|
-
|
|
39218
|
-
candidateTraceSummary
|
|
39298
|
+
outputMessages,
|
|
39299
|
+
traceSummary
|
|
39219
39300
|
});
|
|
39220
39301
|
return { score };
|
|
39221
39302
|
}
|
|
@@ -39232,9 +39313,8 @@ async function runEvaluatorList(options) {
|
|
|
39232
39313
|
now,
|
|
39233
39314
|
judgeProvider,
|
|
39234
39315
|
agentTimeoutMs,
|
|
39235
|
-
|
|
39236
|
-
|
|
39237
|
-
candidateTraceSummary
|
|
39316
|
+
outputMessages,
|
|
39317
|
+
traceSummary
|
|
39238
39318
|
} = options;
|
|
39239
39319
|
const scored = [];
|
|
39240
39320
|
const evaluatorResults = [];
|
|
@@ -39281,8 +39361,8 @@ async function runEvaluatorList(options) {
|
|
|
39281
39361
|
attempt,
|
|
39282
39362
|
promptInputs,
|
|
39283
39363
|
now,
|
|
39284
|
-
|
|
39285
|
-
|
|
39364
|
+
outputMessages,
|
|
39365
|
+
traceSummary
|
|
39286
39366
|
});
|
|
39287
39367
|
const weight = evaluator.weight ?? 1;
|
|
39288
39368
|
scored.push({ score: score2, name: evaluator.name, type: "code_judge", weight });
|
|
@@ -39368,9 +39448,8 @@ async function runEvaluatorList(options) {
|
|
|
39368
39448
|
attempt,
|
|
39369
39449
|
promptInputs,
|
|
39370
39450
|
now,
|
|
39371
|
-
|
|
39372
|
-
|
|
39373
|
-
candidateTraceSummary
|
|
39451
|
+
outputMessages,
|
|
39452
|
+
traceSummary
|
|
39374
39453
|
});
|
|
39375
39454
|
const weight = evaluator.weight ?? 1;
|
|
39376
39455
|
scored.push({ score: score2, name: evaluator.name, type: evaluator.type, weight });
|
|
@@ -39730,16 +39809,90 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
|
|
|
39730
39809
|
return parts.join("\n");
|
|
39731
39810
|
}
|
|
39732
39811
|
|
|
39812
|
+
// src/commands/convert/index.ts
|
|
39813
|
+
import { command, option, optional as optional2, positional, string as string4 } from "cmd-ts";
|
|
39814
|
+
import { stringify as stringifyYaml } from "yaml";
|
|
39815
|
+
function convertJsonlToYaml(inputPath, outputPath) {
|
|
39816
|
+
const content = readFileSync(inputPath, "utf8");
|
|
39817
|
+
const lines = content.trim().split("\n").filter((line2) => line2.trim());
|
|
39818
|
+
let yamlOutput = "";
|
|
39819
|
+
let isFirst = true;
|
|
39820
|
+
for (const line2 of lines) {
|
|
39821
|
+
const record2 = JSON.parse(line2);
|
|
39822
|
+
const yamlDoc = stringifyYaml(record2, {
|
|
39823
|
+
indent: 2,
|
|
39824
|
+
lineWidth: 0
|
|
39825
|
+
});
|
|
39826
|
+
const normalizedYaml = normalizeLineEndings(yamlDoc);
|
|
39827
|
+
const separator = isFirst ? "---\n" : "\n---\n";
|
|
39828
|
+
isFirst = false;
|
|
39829
|
+
yamlOutput += separator + normalizedYaml;
|
|
39830
|
+
}
|
|
39831
|
+
writeFileSync(outputPath, yamlOutput);
|
|
39832
|
+
return lines.length;
|
|
39833
|
+
}
|
|
39834
|
+
var convertCommand = command({
|
|
39835
|
+
name: "convert",
|
|
39836
|
+
description: "Convert evaluation results from JSONL to YAML format",
|
|
39837
|
+
args: {
|
|
39838
|
+
input: positional({
|
|
39839
|
+
type: string4,
|
|
39840
|
+
displayName: "input",
|
|
39841
|
+
description: "Path to input JSONL file"
|
|
39842
|
+
}),
|
|
39843
|
+
out: option({
|
|
39844
|
+
type: optional2(string4),
|
|
39845
|
+
long: "out",
|
|
39846
|
+
short: "o",
|
|
39847
|
+
description: "Output file path (defaults to input path with .yaml extension)"
|
|
39848
|
+
})
|
|
39849
|
+
},
|
|
39850
|
+
handler: async ({ input, out }) => {
|
|
39851
|
+
if (!input.endsWith(".jsonl")) {
|
|
39852
|
+
console.error("Error: Input file must be a .jsonl file");
|
|
39853
|
+
process.exit(1);
|
|
39854
|
+
}
|
|
39855
|
+
const outputPath = out ?? input.replace(/\.jsonl$/, ".yaml");
|
|
39856
|
+
try {
|
|
39857
|
+
const count = convertJsonlToYaml(input, outputPath);
|
|
39858
|
+
console.log(`Converted ${count} records to ${path14.resolve(outputPath)}`);
|
|
39859
|
+
} catch (error40) {
|
|
39860
|
+
console.error(`Error: ${error40.message}`);
|
|
39861
|
+
process.exit(1);
|
|
39862
|
+
}
|
|
39863
|
+
}
|
|
39864
|
+
});
|
|
39865
|
+
|
|
39866
|
+
// src/commands/eval/index.ts
|
|
39867
|
+
import { stat as stat4 } from "node:fs/promises";
|
|
39868
|
+
import path21 from "node:path";
|
|
39869
|
+
import {
|
|
39870
|
+
command as command2,
|
|
39871
|
+
flag,
|
|
39872
|
+
number as number4,
|
|
39873
|
+
option as option2,
|
|
39874
|
+
optional as optional3,
|
|
39875
|
+
restPositionals,
|
|
39876
|
+
string as string5
|
|
39877
|
+
} from "cmd-ts";
|
|
39878
|
+
import fg from "fast-glob";
|
|
39879
|
+
|
|
39880
|
+
// src/commands/eval/run-eval.ts
|
|
39881
|
+
import { constants as constants6 } from "node:fs";
|
|
39882
|
+
import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
|
|
39883
|
+
import path20 from "node:path";
|
|
39884
|
+
import { pathToFileURL } from "node:url";
|
|
39885
|
+
|
|
39733
39886
|
// src/commands/eval/env.ts
|
|
39734
39887
|
import { constants as constants4 } from "node:fs";
|
|
39735
39888
|
import { access as access4 } from "node:fs/promises";
|
|
39736
|
-
import
|
|
39889
|
+
import path15 from "node:path";
|
|
39737
39890
|
import { config as loadDotenv } from "dotenv";
|
|
39738
39891
|
function uniqueDirs(directories) {
|
|
39739
39892
|
const seen = /* @__PURE__ */ new Set();
|
|
39740
39893
|
const result = [];
|
|
39741
39894
|
for (const dir of directories) {
|
|
39742
|
-
const absolute =
|
|
39895
|
+
const absolute = path15.resolve(dir);
|
|
39743
39896
|
if (seen.has(absolute)) {
|
|
39744
39897
|
continue;
|
|
39745
39898
|
}
|
|
@@ -39758,14 +39911,14 @@ async function fileExists4(filePath) {
|
|
|
39758
39911
|
}
|
|
39759
39912
|
function collectAncestorDirectories(start, boundary) {
|
|
39760
39913
|
const directories = [];
|
|
39761
|
-
const boundaryDir =
|
|
39762
|
-
let current =
|
|
39914
|
+
const boundaryDir = path15.resolve(boundary);
|
|
39915
|
+
let current = path15.resolve(start);
|
|
39763
39916
|
while (current !== void 0) {
|
|
39764
39917
|
directories.push(current);
|
|
39765
39918
|
if (current === boundaryDir) {
|
|
39766
39919
|
break;
|
|
39767
39920
|
}
|
|
39768
|
-
const parent =
|
|
39921
|
+
const parent = path15.dirname(current);
|
|
39769
39922
|
if (parent === current) {
|
|
39770
39923
|
break;
|
|
39771
39924
|
}
|
|
@@ -39775,12 +39928,12 @@ function collectAncestorDirectories(start, boundary) {
|
|
|
39775
39928
|
}
|
|
39776
39929
|
async function loadEnvFromHierarchy(options) {
|
|
39777
39930
|
const { testFilePath, repoRoot, verbose } = options;
|
|
39778
|
-
const testDir =
|
|
39931
|
+
const testDir = path15.dirname(path15.resolve(testFilePath));
|
|
39779
39932
|
const cwd = process.cwd();
|
|
39780
39933
|
const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
|
|
39781
39934
|
const envFiles = [];
|
|
39782
39935
|
for (const dir of searchDirs) {
|
|
39783
|
-
const candidate =
|
|
39936
|
+
const candidate = path15.join(dir, ".env");
|
|
39784
39937
|
if (await fileExists4(candidate)) {
|
|
39785
39938
|
envFiles.push(candidate);
|
|
39786
39939
|
}
|
|
@@ -39804,7 +39957,7 @@ async function loadEnvFromHierarchy(options) {
|
|
|
39804
39957
|
// src/commands/eval/jsonl-writer.ts
|
|
39805
39958
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
39806
39959
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
39807
|
-
import
|
|
39960
|
+
import path16 from "node:path";
|
|
39808
39961
|
import { finished } from "node:stream/promises";
|
|
39809
39962
|
|
|
39810
39963
|
// ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
|
|
@@ -40022,7 +40175,7 @@ var JsonlWriter = class _JsonlWriter {
|
|
|
40022
40175
|
this.stream = stream;
|
|
40023
40176
|
}
|
|
40024
40177
|
static async open(filePath) {
|
|
40025
|
-
await mkdir5(
|
|
40178
|
+
await mkdir5(path16.dirname(filePath), { recursive: true });
|
|
40026
40179
|
const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
|
|
40027
40180
|
return new _JsonlWriter(stream);
|
|
40028
40181
|
}
|
|
@@ -40054,9 +40207,9 @@ var JsonlWriter = class _JsonlWriter {
|
|
|
40054
40207
|
// src/commands/eval/yaml-writer.ts
|
|
40055
40208
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
40056
40209
|
import { mkdir as mkdir6 } from "node:fs/promises";
|
|
40057
|
-
import
|
|
40210
|
+
import path17 from "node:path";
|
|
40058
40211
|
import { finished as finished2 } from "node:stream/promises";
|
|
40059
|
-
import { stringify as
|
|
40212
|
+
import { stringify as stringifyYaml2 } from "yaml";
|
|
40060
40213
|
var YamlWriter = class _YamlWriter {
|
|
40061
40214
|
stream;
|
|
40062
40215
|
mutex = new Mutex();
|
|
@@ -40066,7 +40219,7 @@ var YamlWriter = class _YamlWriter {
|
|
|
40066
40219
|
this.stream = stream;
|
|
40067
40220
|
}
|
|
40068
40221
|
static async open(filePath) {
|
|
40069
|
-
await mkdir6(
|
|
40222
|
+
await mkdir6(path17.dirname(filePath), { recursive: true });
|
|
40070
40223
|
const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
|
|
40071
40224
|
return new _YamlWriter(stream);
|
|
40072
40225
|
}
|
|
@@ -40075,7 +40228,7 @@ var YamlWriter = class _YamlWriter {
|
|
|
40075
40228
|
if (this.closed) {
|
|
40076
40229
|
throw new Error("Cannot write to closed YAML writer");
|
|
40077
40230
|
}
|
|
40078
|
-
const yamlDoc =
|
|
40231
|
+
const yamlDoc = stringifyYaml2(record2, {
|
|
40079
40232
|
indent: 2,
|
|
40080
40233
|
lineWidth: 0
|
|
40081
40234
|
// Disable line wrapping
|
|
@@ -40185,12 +40338,12 @@ var ProgressDisplay = class {
|
|
|
40185
40338
|
}
|
|
40186
40339
|
addLogPaths(paths) {
|
|
40187
40340
|
const newPaths = [];
|
|
40188
|
-
for (const
|
|
40189
|
-
if (this.logPathSet.has(
|
|
40341
|
+
for (const path28 of paths) {
|
|
40342
|
+
if (this.logPathSet.has(path28)) {
|
|
40190
40343
|
continue;
|
|
40191
40344
|
}
|
|
40192
|
-
this.logPathSet.add(
|
|
40193
|
-
newPaths.push(
|
|
40345
|
+
this.logPathSet.add(path28);
|
|
40346
|
+
newPaths.push(path28);
|
|
40194
40347
|
}
|
|
40195
40348
|
if (newPaths.length === 0) {
|
|
40196
40349
|
return;
|
|
@@ -40202,8 +40355,8 @@ var ProgressDisplay = class {
|
|
|
40202
40355
|
this.hasPrintedLogHeader = true;
|
|
40203
40356
|
}
|
|
40204
40357
|
const startIndex = this.logPaths.length - newPaths.length;
|
|
40205
|
-
newPaths.forEach((
|
|
40206
|
-
console.log(`${startIndex + offset + 1}. ${
|
|
40358
|
+
newPaths.forEach((path28, offset) => {
|
|
40359
|
+
console.log(`${startIndex + offset + 1}. ${path28}`);
|
|
40207
40360
|
});
|
|
40208
40361
|
}
|
|
40209
40362
|
finish() {
|
|
@@ -40358,7 +40511,7 @@ function formatEvaluationSummary(summary) {
|
|
|
40358
40511
|
|
|
40359
40512
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
40360
40513
|
import { readFile as readFile7 } from "node:fs/promises";
|
|
40361
|
-
import
|
|
40514
|
+
import path18 from "node:path";
|
|
40362
40515
|
import { parse as parse6 } from "yaml";
|
|
40363
40516
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
40364
40517
|
import path23 from "node:path";
|
|
@@ -40401,8 +40554,8 @@ async function detectFileType(filePath) {
|
|
|
40401
40554
|
}
|
|
40402
40555
|
}
|
|
40403
40556
|
function inferFileTypeFromPath(filePath) {
|
|
40404
|
-
const normalized =
|
|
40405
|
-
const basename =
|
|
40557
|
+
const normalized = path18.normalize(filePath).replace(/\\/g, "/");
|
|
40558
|
+
const basename = path18.basename(filePath);
|
|
40406
40559
|
if (normalized.includes("/.agentv/")) {
|
|
40407
40560
|
if (basename === "config.yaml" || basename === "config.yml") {
|
|
40408
40561
|
return "config";
|
|
@@ -40725,7 +40878,11 @@ var CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
40725
40878
|
"env",
|
|
40726
40879
|
"timeout_seconds",
|
|
40727
40880
|
"timeoutSeconds",
|
|
40728
|
-
"healthcheck"
|
|
40881
|
+
"healthcheck",
|
|
40882
|
+
"keep_temp_files",
|
|
40883
|
+
"keepTempFiles",
|
|
40884
|
+
"keep_output_files",
|
|
40885
|
+
"keepOutputFiles"
|
|
40729
40886
|
]);
|
|
40730
40887
|
function getKnownSettings(provider) {
|
|
40731
40888
|
const normalizedProvider = provider.toLowerCase();
|
|
@@ -41243,12 +41400,12 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
41243
41400
|
// src/utils/targets.ts
|
|
41244
41401
|
import { constants as constants5 } from "node:fs";
|
|
41245
41402
|
import { access as access5 } from "node:fs/promises";
|
|
41246
|
-
import
|
|
41403
|
+
import path19 from "node:path";
|
|
41247
41404
|
var TARGET_FILE_CANDIDATES = [
|
|
41248
41405
|
"targets.yaml",
|
|
41249
41406
|
"targets.yml",
|
|
41250
|
-
|
|
41251
|
-
|
|
41407
|
+
path19.join(".agentv", "targets.yaml"),
|
|
41408
|
+
path19.join(".agentv", "targets.yml")
|
|
41252
41409
|
];
|
|
41253
41410
|
async function fileExists5(filePath) {
|
|
41254
41411
|
try {
|
|
@@ -41261,12 +41418,12 @@ async function fileExists5(filePath) {
|
|
|
41261
41418
|
async function discoverTargetsFile(options) {
|
|
41262
41419
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
41263
41420
|
if (explicitPath) {
|
|
41264
|
-
const resolvedExplicit =
|
|
41421
|
+
const resolvedExplicit = path19.resolve(explicitPath);
|
|
41265
41422
|
if (await fileExists5(resolvedExplicit)) {
|
|
41266
41423
|
return resolvedExplicit;
|
|
41267
41424
|
}
|
|
41268
41425
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
41269
|
-
const nested =
|
|
41426
|
+
const nested = path19.join(resolvedExplicit, candidate);
|
|
41270
41427
|
if (await fileExists5(nested)) {
|
|
41271
41428
|
return nested;
|
|
41272
41429
|
}
|
|
@@ -41274,13 +41431,13 @@ async function discoverTargetsFile(options) {
|
|
|
41274
41431
|
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
41275
41432
|
}
|
|
41276
41433
|
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
41277
|
-
const resolvedCwd =
|
|
41434
|
+
const resolvedCwd = path19.resolve(cwd);
|
|
41278
41435
|
if (!directories.includes(resolvedCwd)) {
|
|
41279
41436
|
directories.push(resolvedCwd);
|
|
41280
41437
|
}
|
|
41281
41438
|
for (const directory of directories) {
|
|
41282
41439
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
41283
|
-
const fullPath =
|
|
41440
|
+
const fullPath = path19.join(directory, candidate);
|
|
41284
41441
|
if (await fileExists5(fullPath)) {
|
|
41285
41442
|
return fullPath;
|
|
41286
41443
|
}
|
|
@@ -41459,15 +41616,15 @@ async function ensureFileExists(filePath, description) {
|
|
|
41459
41616
|
}
|
|
41460
41617
|
}
|
|
41461
41618
|
async function findRepoRoot(start) {
|
|
41462
|
-
const fallback =
|
|
41619
|
+
const fallback = path20.resolve(start);
|
|
41463
41620
|
let current = fallback;
|
|
41464
41621
|
while (current !== void 0) {
|
|
41465
|
-
const candidate =
|
|
41622
|
+
const candidate = path20.join(current, ".git");
|
|
41466
41623
|
try {
|
|
41467
41624
|
await access6(candidate, constants6.F_OK);
|
|
41468
41625
|
return current;
|
|
41469
41626
|
} catch {
|
|
41470
|
-
const parent =
|
|
41627
|
+
const parent = path20.dirname(current);
|
|
41471
41628
|
if (parent === current) {
|
|
41472
41629
|
break;
|
|
41473
41630
|
}
|
|
@@ -41480,16 +41637,16 @@ function buildDefaultOutputPath(cwd, format) {
|
|
|
41480
41637
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
41481
41638
|
const baseName = "eval";
|
|
41482
41639
|
const extension = getDefaultExtension(format);
|
|
41483
|
-
return
|
|
41640
|
+
return path20.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
41484
41641
|
}
|
|
41485
|
-
function resolvePromptDirectory(
|
|
41486
|
-
if (
|
|
41642
|
+
function resolvePromptDirectory(option5, cwd) {
|
|
41643
|
+
if (option5 === void 0) {
|
|
41487
41644
|
return void 0;
|
|
41488
41645
|
}
|
|
41489
|
-
if (typeof
|
|
41490
|
-
return
|
|
41646
|
+
if (typeof option5 === "string" && option5.trim().length > 0) {
|
|
41647
|
+
return path20.resolve(cwd, option5);
|
|
41491
41648
|
}
|
|
41492
|
-
return
|
|
41649
|
+
return path20.join(cwd, ".agentv", "prompts");
|
|
41493
41650
|
}
|
|
41494
41651
|
function createEvaluationCache() {
|
|
41495
41652
|
const store = /* @__PURE__ */ new Map();
|
|
@@ -41514,7 +41671,7 @@ function createProgressReporter(maxWorkers, options) {
|
|
|
41514
41671
|
};
|
|
41515
41672
|
}
|
|
41516
41673
|
function makeEvalKey(testFilePath, evalId) {
|
|
41517
|
-
return `${
|
|
41674
|
+
return `${path20.resolve(testFilePath)}::${evalId}`;
|
|
41518
41675
|
}
|
|
41519
41676
|
function createDisplayIdTracker() {
|
|
41520
41677
|
const map2 = /* @__PURE__ */ new Map();
|
|
@@ -41686,7 +41843,7 @@ async function runEvalCommand(input) {
|
|
|
41686
41843
|
if (options.verbose) {
|
|
41687
41844
|
console.log(`Repository root: ${repoRoot}`);
|
|
41688
41845
|
}
|
|
41689
|
-
const outputPath = options.outPath ?
|
|
41846
|
+
const outputPath = options.outPath ? path20.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
|
|
41690
41847
|
console.log(`Output path: ${outputPath}`);
|
|
41691
41848
|
const outputWriter = await createOutputWriter(outputPath, options.format);
|
|
41692
41849
|
const cache = options.cache ? createEvaluationCache() : void 0;
|
|
@@ -41694,7 +41851,7 @@ async function runEvalCommand(input) {
|
|
|
41694
41851
|
const allResults = [];
|
|
41695
41852
|
let lastPromptDumpDir;
|
|
41696
41853
|
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
41697
|
-
const resolvedTestFiles = input.testFiles.map((file2) =>
|
|
41854
|
+
const resolvedTestFiles = input.testFiles.map((file2) => path20.resolve(file2));
|
|
41698
41855
|
const displayIdTracker = createDisplayIdTracker();
|
|
41699
41856
|
const totalWorkers = options.workers ?? DEFAULT_WORKERS;
|
|
41700
41857
|
const fileConcurrency = Math.min(
|
|
@@ -41790,7 +41947,7 @@ async function resolveEvaluationRunner() {
|
|
|
41790
41947
|
if (!overridePath) {
|
|
41791
41948
|
return runEvaluation;
|
|
41792
41949
|
}
|
|
41793
|
-
const resolved =
|
|
41950
|
+
const resolved = path20.isAbsolute(overridePath) ? overridePath : path20.resolve(process.cwd(), overridePath);
|
|
41794
41951
|
const moduleUrl = pathToFileURL(resolved).href;
|
|
41795
41952
|
const mod = await import(moduleUrl);
|
|
41796
41953
|
const candidate = mod.runEvaluation;
|
|
@@ -41803,44 +41960,44 @@ async function resolveEvaluationRunner() {
|
|
|
41803
41960
|
}
|
|
41804
41961
|
|
|
41805
41962
|
// src/commands/eval/index.ts
|
|
41806
|
-
var evalCommand =
|
|
41963
|
+
var evalCommand = command2({
|
|
41807
41964
|
name: "eval",
|
|
41808
41965
|
description: "Run eval suites and report results",
|
|
41809
41966
|
args: {
|
|
41810
41967
|
evalPaths: restPositionals({
|
|
41811
|
-
type:
|
|
41968
|
+
type: string5,
|
|
41812
41969
|
displayName: "eval-paths",
|
|
41813
41970
|
description: "Path(s) or glob(s) to evaluation .yaml file(s)"
|
|
41814
41971
|
}),
|
|
41815
|
-
target:
|
|
41816
|
-
type:
|
|
41972
|
+
target: option2({
|
|
41973
|
+
type: string5,
|
|
41817
41974
|
long: "target",
|
|
41818
41975
|
description: "Override target name from targets.yaml",
|
|
41819
41976
|
defaultValue: () => "default"
|
|
41820
41977
|
}),
|
|
41821
|
-
targets:
|
|
41822
|
-
type:
|
|
41978
|
+
targets: option2({
|
|
41979
|
+
type: optional3(string5),
|
|
41823
41980
|
long: "targets",
|
|
41824
41981
|
description: "Path to targets.yaml (overrides discovery)"
|
|
41825
41982
|
}),
|
|
41826
|
-
evalId:
|
|
41827
|
-
type:
|
|
41983
|
+
evalId: option2({
|
|
41984
|
+
type: optional3(string5),
|
|
41828
41985
|
long: "eval-id",
|
|
41829
41986
|
description: "Run only the eval case with this identifier"
|
|
41830
41987
|
}),
|
|
41831
|
-
workers:
|
|
41988
|
+
workers: option2({
|
|
41832
41989
|
type: number4,
|
|
41833
41990
|
long: "workers",
|
|
41834
41991
|
description: "Number of parallel workers (default: 3, max: 50). Can also be set per-target in targets.yaml",
|
|
41835
41992
|
defaultValue: () => 3
|
|
41836
41993
|
}),
|
|
41837
|
-
out:
|
|
41838
|
-
type:
|
|
41994
|
+
out: option2({
|
|
41995
|
+
type: optional3(string5),
|
|
41839
41996
|
long: "out",
|
|
41840
41997
|
description: "Write results to the specified path"
|
|
41841
41998
|
}),
|
|
41842
|
-
outputFormat:
|
|
41843
|
-
type:
|
|
41999
|
+
outputFormat: option2({
|
|
42000
|
+
type: string5,
|
|
41844
42001
|
long: "output-format",
|
|
41845
42002
|
description: "Output format: 'jsonl' or 'yaml' (default: jsonl)",
|
|
41846
42003
|
defaultValue: () => "jsonl"
|
|
@@ -41849,31 +42006,31 @@ var evalCommand = command({
|
|
|
41849
42006
|
long: "dry-run",
|
|
41850
42007
|
description: "Use mock provider responses instead of real LLM calls"
|
|
41851
42008
|
}),
|
|
41852
|
-
dryRunDelay:
|
|
42009
|
+
dryRunDelay: option2({
|
|
41853
42010
|
type: number4,
|
|
41854
42011
|
long: "dry-run-delay",
|
|
41855
42012
|
description: "Fixed delay in milliseconds for dry-run mode (overridden by delay range if specified)",
|
|
41856
42013
|
defaultValue: () => 0
|
|
41857
42014
|
}),
|
|
41858
|
-
dryRunDelayMin:
|
|
42015
|
+
dryRunDelayMin: option2({
|
|
41859
42016
|
type: number4,
|
|
41860
42017
|
long: "dry-run-delay-min",
|
|
41861
42018
|
description: "Minimum delay in milliseconds for dry-run mode (requires --dry-run-delay-max)",
|
|
41862
42019
|
defaultValue: () => 0
|
|
41863
42020
|
}),
|
|
41864
|
-
dryRunDelayMax:
|
|
42021
|
+
dryRunDelayMax: option2({
|
|
41865
42022
|
type: number4,
|
|
41866
42023
|
long: "dry-run-delay-max",
|
|
41867
42024
|
description: "Maximum delay in milliseconds for dry-run mode (requires --dry-run-delay-min)",
|
|
41868
42025
|
defaultValue: () => 0
|
|
41869
42026
|
}),
|
|
41870
|
-
agentTimeout:
|
|
42027
|
+
agentTimeout: option2({
|
|
41871
42028
|
type: number4,
|
|
41872
42029
|
long: "agent-timeout",
|
|
41873
42030
|
description: "Timeout in seconds for provider responses (default: 120)",
|
|
41874
42031
|
defaultValue: () => 120
|
|
41875
42032
|
}),
|
|
41876
|
-
maxRetries:
|
|
42033
|
+
maxRetries: option2({
|
|
41877
42034
|
type: number4,
|
|
41878
42035
|
long: "max-retries",
|
|
41879
42036
|
description: "Retry count for timeout recoveries (default: 2)",
|
|
@@ -41887,8 +42044,8 @@ var evalCommand = command({
|
|
|
41887
42044
|
long: "verbose",
|
|
41888
42045
|
description: "Enable verbose logging"
|
|
41889
42046
|
}),
|
|
41890
|
-
dumpPrompts:
|
|
41891
|
-
type:
|
|
42047
|
+
dumpPrompts: option2({
|
|
42048
|
+
type: optional3(string5),
|
|
41892
42049
|
long: "dump-prompts",
|
|
41893
42050
|
description: "Directory path for persisting prompt payloads for debugging"
|
|
41894
42051
|
}),
|
|
@@ -41934,7 +42091,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
41934
42091
|
const unmatched = [];
|
|
41935
42092
|
const results = /* @__PURE__ */ new Set();
|
|
41936
42093
|
for (const pattern of normalizedInputs) {
|
|
41937
|
-
const candidatePath =
|
|
42094
|
+
const candidatePath = path21.isAbsolute(pattern) ? path21.normalize(pattern) : path21.resolve(cwd, pattern);
|
|
41938
42095
|
try {
|
|
41939
42096
|
const stats = await stat4(candidatePath);
|
|
41940
42097
|
if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
|
|
@@ -41958,7 +42115,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
41958
42115
|
continue;
|
|
41959
42116
|
}
|
|
41960
42117
|
for (const filePath of yamlMatches) {
|
|
41961
|
-
results.add(
|
|
42118
|
+
results.add(path21.normalize(filePath));
|
|
41962
42119
|
}
|
|
41963
42120
|
}
|
|
41964
42121
|
if (unmatched.length > 0) {
|
|
@@ -41974,11 +42131,11 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
41974
42131
|
}
|
|
41975
42132
|
|
|
41976
42133
|
// src/commands/generate/index.ts
|
|
41977
|
-
import { command as
|
|
42134
|
+
import { command as command3, flag as flag2, option as option3, optional as optional4, positional as positional3, string as string6, subcommands } from "cmd-ts";
|
|
41978
42135
|
|
|
41979
42136
|
// src/commands/generate/rubrics.ts
|
|
41980
42137
|
import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
|
|
41981
|
-
import
|
|
42138
|
+
import path24 from "node:path";
|
|
41982
42139
|
import { pathToFileURL as pathToFileURL2 } from "node:url";
|
|
41983
42140
|
import { isMap, isSeq, parseDocument } from "yaml";
|
|
41984
42141
|
function isJsonObject3(value) {
|
|
@@ -41990,7 +42147,7 @@ function asString6(value) {
|
|
|
41990
42147
|
async function loadRubricGenerator() {
|
|
41991
42148
|
const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
|
|
41992
42149
|
if (customGenerator) {
|
|
41993
|
-
const generatorPath =
|
|
42150
|
+
const generatorPath = path24.resolve(customGenerator);
|
|
41994
42151
|
const generatorUrl = pathToFileURL2(generatorPath).href;
|
|
41995
42152
|
const module = await import(generatorUrl);
|
|
41996
42153
|
return module.generateRubrics;
|
|
@@ -42000,7 +42157,7 @@ async function loadRubricGenerator() {
|
|
|
42000
42157
|
async function generateRubricsCommand(options) {
|
|
42001
42158
|
const { file: file2, target: targetOverride, verbose } = options;
|
|
42002
42159
|
console.log(`Generating rubrics for: ${file2}`);
|
|
42003
|
-
const absolutePath =
|
|
42160
|
+
const absolutePath = path24.resolve(file2);
|
|
42004
42161
|
const content = await readFile8(absolutePath, "utf8");
|
|
42005
42162
|
const doc = parseDocument(content);
|
|
42006
42163
|
const parsed = doc.toJSON();
|
|
@@ -42117,17 +42274,17 @@ function extractQuestion(evalCase) {
|
|
|
42117
42274
|
}
|
|
42118
42275
|
|
|
42119
42276
|
// src/commands/generate/index.ts
|
|
42120
|
-
var rubricsCommand =
|
|
42277
|
+
var rubricsCommand = command3({
|
|
42121
42278
|
name: "rubrics",
|
|
42122
42279
|
description: "Generate rubrics from expected_outcome in YAML eval file",
|
|
42123
42280
|
args: {
|
|
42124
|
-
file:
|
|
42125
|
-
type:
|
|
42281
|
+
file: positional3({
|
|
42282
|
+
type: string6,
|
|
42126
42283
|
displayName: "file",
|
|
42127
42284
|
description: "Path to YAML eval file"
|
|
42128
42285
|
}),
|
|
42129
|
-
target:
|
|
42130
|
-
type:
|
|
42286
|
+
target: option3({
|
|
42287
|
+
type: optional4(string6),
|
|
42131
42288
|
long: "target",
|
|
42132
42289
|
short: "t",
|
|
42133
42290
|
description: "Override target for rubric generation (default: file target or openai:gpt-4o)"
|
|
@@ -42160,14 +42317,14 @@ var generateCommand = subcommands({
|
|
|
42160
42317
|
});
|
|
42161
42318
|
|
|
42162
42319
|
// src/commands/init/index.ts
|
|
42163
|
-
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
42164
|
-
import
|
|
42320
|
+
import { existsSync, mkdirSync, writeFileSync as writeFileSync2 } from "node:fs";
|
|
42321
|
+
import path26 from "node:path";
|
|
42165
42322
|
import * as readline from "node:readline/promises";
|
|
42166
|
-
import { command as
|
|
42323
|
+
import { command as command4, option as option4, optional as optional5, string as string7 } from "cmd-ts";
|
|
42167
42324
|
|
|
42168
42325
|
// src/templates/index.ts
|
|
42169
|
-
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
42170
|
-
import
|
|
42326
|
+
import { readFileSync as readFileSync2, readdirSync, statSync } from "node:fs";
|
|
42327
|
+
import path25 from "node:path";
|
|
42171
42328
|
import { fileURLToPath } from "node:url";
|
|
42172
42329
|
function getGithubTemplates() {
|
|
42173
42330
|
return getTemplatesFromDir(".github");
|
|
@@ -42179,12 +42336,12 @@ function getClaudeTemplates() {
|
|
|
42179
42336
|
return getTemplatesFromDir(".claude");
|
|
42180
42337
|
}
|
|
42181
42338
|
function getTemplatesFromDir(subdir) {
|
|
42182
|
-
const currentDir =
|
|
42339
|
+
const currentDir = path25.dirname(fileURLToPath(import.meta.url));
|
|
42183
42340
|
let templatesDir;
|
|
42184
|
-
if (currentDir.includes(`${
|
|
42185
|
-
templatesDir =
|
|
42341
|
+
if (currentDir.includes(`${path25.sep}dist`)) {
|
|
42342
|
+
templatesDir = path25.join(currentDir, "templates", subdir);
|
|
42186
42343
|
} else {
|
|
42187
|
-
templatesDir =
|
|
42344
|
+
templatesDir = path25.join(currentDir, subdir);
|
|
42188
42345
|
}
|
|
42189
42346
|
return readTemplatesRecursively(templatesDir, "");
|
|
42190
42347
|
}
|
|
@@ -42192,15 +42349,15 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
42192
42349
|
const templates = [];
|
|
42193
42350
|
const entries = readdirSync(dir);
|
|
42194
42351
|
for (const entry of entries) {
|
|
42195
|
-
const fullPath =
|
|
42352
|
+
const fullPath = path25.join(dir, entry);
|
|
42196
42353
|
const stat6 = statSync(fullPath);
|
|
42197
|
-
const entryRelativePath = relativePath ?
|
|
42354
|
+
const entryRelativePath = relativePath ? path25.join(relativePath, entry) : entry;
|
|
42198
42355
|
if (stat6.isDirectory()) {
|
|
42199
42356
|
templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
|
|
42200
42357
|
} else {
|
|
42201
|
-
const content =
|
|
42358
|
+
const content = readFileSync2(fullPath, "utf-8");
|
|
42202
42359
|
templates.push({
|
|
42203
|
-
path: entryRelativePath.split(
|
|
42360
|
+
path: entryRelativePath.split(path25.sep).join("/"),
|
|
42204
42361
|
// Normalize to forward slashes
|
|
42205
42362
|
content
|
|
42206
42363
|
});
|
|
@@ -42223,10 +42380,10 @@ async function promptYesNo(message) {
|
|
|
42223
42380
|
}
|
|
42224
42381
|
}
|
|
42225
42382
|
async function initCommand(options = {}) {
|
|
42226
|
-
const targetPath =
|
|
42227
|
-
const githubDir =
|
|
42228
|
-
const agentvDir =
|
|
42229
|
-
const claudeDir =
|
|
42383
|
+
const targetPath = path26.resolve(options.targetPath ?? ".");
|
|
42384
|
+
const githubDir = path26.join(targetPath, ".github");
|
|
42385
|
+
const agentvDir = path26.join(targetPath, ".agentv");
|
|
42386
|
+
const claudeDir = path26.join(targetPath, ".claude");
|
|
42230
42387
|
const githubTemplates = getGithubTemplates();
|
|
42231
42388
|
const agentvTemplates = getAgentvTemplates();
|
|
42232
42389
|
const claudeTemplates = getClaudeTemplates();
|
|
@@ -42234,32 +42391,32 @@ async function initCommand(options = {}) {
|
|
|
42234
42391
|
const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
|
|
42235
42392
|
const existingFiles = [];
|
|
42236
42393
|
if (envTemplate) {
|
|
42237
|
-
const envFilePath =
|
|
42394
|
+
const envFilePath = path26.join(targetPath, ".env.template");
|
|
42238
42395
|
if (existsSync(envFilePath)) {
|
|
42239
42396
|
existingFiles.push(".env.template");
|
|
42240
42397
|
}
|
|
42241
42398
|
}
|
|
42242
42399
|
if (existsSync(githubDir)) {
|
|
42243
42400
|
for (const template of githubTemplates) {
|
|
42244
|
-
const targetFilePath =
|
|
42401
|
+
const targetFilePath = path26.join(githubDir, template.path);
|
|
42245
42402
|
if (existsSync(targetFilePath)) {
|
|
42246
|
-
existingFiles.push(
|
|
42403
|
+
existingFiles.push(path26.relative(targetPath, targetFilePath));
|
|
42247
42404
|
}
|
|
42248
42405
|
}
|
|
42249
42406
|
}
|
|
42250
42407
|
if (existsSync(agentvDir)) {
|
|
42251
42408
|
for (const template of otherAgentvTemplates) {
|
|
42252
|
-
const targetFilePath =
|
|
42409
|
+
const targetFilePath = path26.join(agentvDir, template.path);
|
|
42253
42410
|
if (existsSync(targetFilePath)) {
|
|
42254
|
-
existingFiles.push(
|
|
42411
|
+
existingFiles.push(path26.relative(targetPath, targetFilePath));
|
|
42255
42412
|
}
|
|
42256
42413
|
}
|
|
42257
42414
|
}
|
|
42258
42415
|
if (existsSync(claudeDir)) {
|
|
42259
42416
|
for (const template of claudeTemplates) {
|
|
42260
|
-
const targetFilePath =
|
|
42417
|
+
const targetFilePath = path26.join(claudeDir, template.path);
|
|
42261
42418
|
if (existsSync(targetFilePath)) {
|
|
42262
|
-
existingFiles.push(
|
|
42419
|
+
existingFiles.push(path26.relative(targetPath, targetFilePath));
|
|
42263
42420
|
}
|
|
42264
42421
|
}
|
|
42265
42422
|
}
|
|
@@ -42286,36 +42443,36 @@ async function initCommand(options = {}) {
|
|
|
42286
42443
|
mkdirSync(claudeDir, { recursive: true });
|
|
42287
42444
|
}
|
|
42288
42445
|
if (envTemplate) {
|
|
42289
|
-
const envFilePath =
|
|
42290
|
-
|
|
42446
|
+
const envFilePath = path26.join(targetPath, ".env.template");
|
|
42447
|
+
writeFileSync2(envFilePath, envTemplate.content, "utf-8");
|
|
42291
42448
|
console.log("Created .env.template");
|
|
42292
42449
|
}
|
|
42293
42450
|
for (const template of githubTemplates) {
|
|
42294
|
-
const targetFilePath =
|
|
42295
|
-
const targetDirPath =
|
|
42451
|
+
const targetFilePath = path26.join(githubDir, template.path);
|
|
42452
|
+
const targetDirPath = path26.dirname(targetFilePath);
|
|
42296
42453
|
if (!existsSync(targetDirPath)) {
|
|
42297
42454
|
mkdirSync(targetDirPath, { recursive: true });
|
|
42298
42455
|
}
|
|
42299
|
-
|
|
42300
|
-
console.log(`Created ${
|
|
42456
|
+
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
42457
|
+
console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
|
|
42301
42458
|
}
|
|
42302
42459
|
for (const template of otherAgentvTemplates) {
|
|
42303
|
-
const targetFilePath =
|
|
42304
|
-
const targetDirPath =
|
|
42460
|
+
const targetFilePath = path26.join(agentvDir, template.path);
|
|
42461
|
+
const targetDirPath = path26.dirname(targetFilePath);
|
|
42305
42462
|
if (!existsSync(targetDirPath)) {
|
|
42306
42463
|
mkdirSync(targetDirPath, { recursive: true });
|
|
42307
42464
|
}
|
|
42308
|
-
|
|
42309
|
-
console.log(`Created ${
|
|
42465
|
+
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
42466
|
+
console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
|
|
42310
42467
|
}
|
|
42311
42468
|
for (const template of claudeTemplates) {
|
|
42312
|
-
const targetFilePath =
|
|
42313
|
-
const targetDirPath =
|
|
42469
|
+
const targetFilePath = path26.join(claudeDir, template.path);
|
|
42470
|
+
const targetDirPath = path26.dirname(targetFilePath);
|
|
42314
42471
|
if (!existsSync(targetDirPath)) {
|
|
42315
42472
|
mkdirSync(targetDirPath, { recursive: true });
|
|
42316
42473
|
}
|
|
42317
|
-
|
|
42318
|
-
console.log(`Created ${
|
|
42474
|
+
writeFileSync2(targetFilePath, template.content, "utf-8");
|
|
42475
|
+
console.log(`Created ${path26.relative(targetPath, targetFilePath)}`);
|
|
42319
42476
|
}
|
|
42320
42477
|
console.log("\nAgentV initialized successfully!");
|
|
42321
42478
|
console.log("\nFiles installed to root:");
|
|
@@ -42323,17 +42480,17 @@ async function initCommand(options = {}) {
|
|
|
42323
42480
|
console.log(" - .env.template");
|
|
42324
42481
|
}
|
|
42325
42482
|
console.log(`
|
|
42326
|
-
Files installed to ${
|
|
42483
|
+
Files installed to ${path26.relative(targetPath, githubDir)}:`);
|
|
42327
42484
|
for (const t of githubTemplates) {
|
|
42328
42485
|
console.log(` - ${t.path}`);
|
|
42329
42486
|
}
|
|
42330
42487
|
console.log(`
|
|
42331
|
-
Files installed to ${
|
|
42488
|
+
Files installed to ${path26.relative(targetPath, agentvDir)}:`);
|
|
42332
42489
|
for (const t of otherAgentvTemplates) {
|
|
42333
42490
|
console.log(` - ${t.path}`);
|
|
42334
42491
|
}
|
|
42335
42492
|
console.log(`
|
|
42336
|
-
Files installed to ${
|
|
42493
|
+
Files installed to ${path26.relative(targetPath, claudeDir)}:`);
|
|
42337
42494
|
for (const t of claudeTemplates) {
|
|
42338
42495
|
console.log(` - ${t.path}`);
|
|
42339
42496
|
}
|
|
@@ -42342,12 +42499,12 @@ Files installed to ${path25.relative(targetPath, claudeDir)}:`);
|
|
|
42342
42499
|
console.log(" 2. Configure targets in .agentv/targets.yaml");
|
|
42343
42500
|
console.log(" 3. Create eval files using the schema and prompt templates");
|
|
42344
42501
|
}
|
|
42345
|
-
var initCmdTsCommand =
|
|
42502
|
+
var initCmdTsCommand = command4({
|
|
42346
42503
|
name: "init",
|
|
42347
42504
|
description: "Initialize AgentV in your project (installs prompt templates and schema to .github)",
|
|
42348
42505
|
args: {
|
|
42349
|
-
path:
|
|
42350
|
-
type:
|
|
42506
|
+
path: option4({
|
|
42507
|
+
type: optional5(string7),
|
|
42351
42508
|
long: "path",
|
|
42352
42509
|
description: "Target directory for initialization (default: current directory)"
|
|
42353
42510
|
})
|
|
@@ -42363,7 +42520,7 @@ var initCmdTsCommand = command3({
|
|
|
42363
42520
|
});
|
|
42364
42521
|
|
|
42365
42522
|
// src/commands/validate/index.ts
|
|
42366
|
-
import { command as
|
|
42523
|
+
import { command as command5, restPositionals as restPositionals2, string as string8 } from "cmd-ts";
|
|
42367
42524
|
|
|
42368
42525
|
// src/commands/validate/format-output.ts
|
|
42369
42526
|
var ANSI_RED3 = "\x1B[31m";
|
|
@@ -42448,7 +42605,7 @@ function isTTY2() {
|
|
|
42448
42605
|
// src/commands/validate/validate-files.ts
|
|
42449
42606
|
import { constants as constants7 } from "node:fs";
|
|
42450
42607
|
import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
|
|
42451
|
-
import
|
|
42608
|
+
import path27 from "node:path";
|
|
42452
42609
|
async function validateFiles(paths) {
|
|
42453
42610
|
const filePaths = await expandPaths(paths);
|
|
42454
42611
|
const results = [];
|
|
@@ -42466,7 +42623,7 @@ async function validateFiles(paths) {
|
|
|
42466
42623
|
};
|
|
42467
42624
|
}
|
|
42468
42625
|
async function validateSingleFile(filePath) {
|
|
42469
|
-
const absolutePath =
|
|
42626
|
+
const absolutePath = path27.resolve(filePath);
|
|
42470
42627
|
const fileType = await detectFileType(absolutePath);
|
|
42471
42628
|
let result;
|
|
42472
42629
|
if (fileType === "eval") {
|
|
@@ -42491,7 +42648,7 @@ async function validateSingleFile(filePath) {
|
|
|
42491
42648
|
async function expandPaths(paths) {
|
|
42492
42649
|
const expanded = [];
|
|
42493
42650
|
for (const inputPath of paths) {
|
|
42494
|
-
const absolutePath =
|
|
42651
|
+
const absolutePath = path27.resolve(inputPath);
|
|
42495
42652
|
try {
|
|
42496
42653
|
await access7(absolutePath, constants7.F_OK);
|
|
42497
42654
|
} catch {
|
|
@@ -42515,7 +42672,7 @@ async function findYamlFiles(dirPath) {
|
|
|
42515
42672
|
try {
|
|
42516
42673
|
const entries = await readdir3(dirPath, { withFileTypes: true });
|
|
42517
42674
|
for (const entry of entries) {
|
|
42518
|
-
const fullPath =
|
|
42675
|
+
const fullPath = path27.join(dirPath, entry.name);
|
|
42519
42676
|
if (entry.isDirectory()) {
|
|
42520
42677
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
42521
42678
|
continue;
|
|
@@ -42532,7 +42689,7 @@ async function findYamlFiles(dirPath) {
|
|
|
42532
42689
|
return results;
|
|
42533
42690
|
}
|
|
42534
42691
|
function isYamlFile(filePath) {
|
|
42535
|
-
const ext =
|
|
42692
|
+
const ext = path27.extname(filePath).toLowerCase();
|
|
42536
42693
|
return ext === ".yaml" || ext === ".yml";
|
|
42537
42694
|
}
|
|
42538
42695
|
|
|
@@ -42549,12 +42706,12 @@ async function runValidateCommand(paths) {
|
|
|
42549
42706
|
process.exit(1);
|
|
42550
42707
|
}
|
|
42551
42708
|
}
|
|
42552
|
-
var validateCommand =
|
|
42709
|
+
var validateCommand = command5({
|
|
42553
42710
|
name: "validate",
|
|
42554
42711
|
description: "Validate AgentV eval and targets YAML files",
|
|
42555
42712
|
args: {
|
|
42556
42713
|
paths: restPositionals2({
|
|
42557
|
-
type:
|
|
42714
|
+
type: string8,
|
|
42558
42715
|
displayName: "paths",
|
|
42559
42716
|
description: "Files or directories to validate"
|
|
42560
42717
|
})
|
|
@@ -42570,16 +42727,17 @@ var validateCommand = command4({
|
|
|
42570
42727
|
});
|
|
42571
42728
|
|
|
42572
42729
|
// src/index.ts
|
|
42573
|
-
var packageJson = JSON.parse(
|
|
42730
|
+
var packageJson = JSON.parse(readFileSync3(new URL("../package.json", import.meta.url), "utf8"));
|
|
42574
42731
|
var app = subcommands2({
|
|
42575
42732
|
name: "agentv",
|
|
42576
42733
|
description: "AgentV CLI",
|
|
42577
42734
|
version: packageJson.version,
|
|
42578
42735
|
cmds: {
|
|
42736
|
+
convert: convertCommand,
|
|
42579
42737
|
eval: evalCommand,
|
|
42580
|
-
validate: validateCommand,
|
|
42581
42738
|
generate: generateCommand,
|
|
42582
|
-
init: initCmdTsCommand
|
|
42739
|
+
init: initCmdTsCommand,
|
|
42740
|
+
validate: validateCommand
|
|
42583
42741
|
}
|
|
42584
42742
|
});
|
|
42585
42743
|
async function runCli(argv = process.argv) {
|
|
@@ -42590,4 +42748,4 @@ export {
|
|
|
42590
42748
|
app,
|
|
42591
42749
|
runCli
|
|
42592
42750
|
};
|
|
42593
|
-
//# sourceMappingURL=chunk-
|
|
42751
|
+
//# sourceMappingURL=chunk-3RYQPI4H.js.map
|