agentv 0.23.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -10
- package/dist/{chunk-4T62HFF4.js → chunk-ZVSFP6NK.js} +822 -233
- package/dist/chunk-ZVSFP6NK.js.map +1 -0
- package/dist/cli.js +1 -1
- package/dist/cli.js.map +1 -1
- package/dist/index.js +1 -1
- package/dist/templates/.agentv/.env.template +10 -10
- package/dist/templates/.agentv/targets.yaml +8 -1
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +75 -6
- package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +215 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +217 -217
- package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +139 -0
- package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +237 -0
- package/package.json +1 -1
- package/dist/chunk-4T62HFF4.js.map +0 -1
- package/dist/templates/agentv/.env.template +0 -23
|
@@ -146,7 +146,7 @@ import { binary, run, subcommands as subcommands2 } from "cmd-ts";
|
|
|
146
146
|
|
|
147
147
|
// src/commands/eval/index.ts
|
|
148
148
|
import { stat as stat4 } from "node:fs/promises";
|
|
149
|
-
import
|
|
149
|
+
import path20 from "node:path";
|
|
150
150
|
import {
|
|
151
151
|
command,
|
|
152
152
|
flag,
|
|
@@ -161,13 +161,14 @@ import fg from "fast-glob";
|
|
|
161
161
|
// src/commands/eval/run-eval.ts
|
|
162
162
|
import { constants as constants6 } from "node:fs";
|
|
163
163
|
import { access as access6, mkdir as mkdir7 } from "node:fs/promises";
|
|
164
|
-
import
|
|
164
|
+
import path19 from "node:path";
|
|
165
165
|
import { pathToFileURL } from "node:url";
|
|
166
166
|
|
|
167
|
-
// ../../packages/core/dist/chunk-
|
|
167
|
+
// ../../packages/core/dist/chunk-OYTL3LNN.js
|
|
168
168
|
import { constants } from "node:fs";
|
|
169
169
|
import { access, readFile } from "node:fs/promises";
|
|
170
170
|
import path from "node:path";
|
|
171
|
+
import path2 from "node:path";
|
|
171
172
|
|
|
172
173
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/external.js
|
|
173
174
|
var external_exports = {};
|
|
@@ -647,8 +648,8 @@ function getErrorMap() {
|
|
|
647
648
|
|
|
648
649
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
649
650
|
var makeIssue = (params) => {
|
|
650
|
-
const { data, path:
|
|
651
|
-
const fullPath = [...
|
|
651
|
+
const { data, path: path27, errorMaps, issueData } = params;
|
|
652
|
+
const fullPath = [...path27, ...issueData.path || []];
|
|
652
653
|
const fullIssue = {
|
|
653
654
|
...issueData,
|
|
654
655
|
path: fullPath
|
|
@@ -764,11 +765,11 @@ var errorUtil;
|
|
|
764
765
|
|
|
765
766
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
|
|
766
767
|
var ParseInputLazyPath = class {
|
|
767
|
-
constructor(parent, value,
|
|
768
|
+
constructor(parent, value, path27, key2) {
|
|
768
769
|
this._cachedPath = [];
|
|
769
770
|
this.parent = parent;
|
|
770
771
|
this.data = value;
|
|
771
|
-
this._path =
|
|
772
|
+
this._path = path27;
|
|
772
773
|
this._key = key2;
|
|
773
774
|
}
|
|
774
775
|
get path() {
|
|
@@ -4210,7 +4211,7 @@ var coerce = {
|
|
|
4210
4211
|
};
|
|
4211
4212
|
var NEVER = INVALID;
|
|
4212
4213
|
|
|
4213
|
-
// ../../packages/core/dist/chunk-
|
|
4214
|
+
// ../../packages/core/dist/chunk-OYTL3LNN.js
|
|
4214
4215
|
async function fileExists(filePath) {
|
|
4215
4216
|
try {
|
|
4216
4217
|
await access(filePath, constants.F_OK);
|
|
@@ -4226,6 +4227,10 @@ async function readTextFile(filePath) {
|
|
|
4226
4227
|
const content = await readFile(filePath, "utf8");
|
|
4227
4228
|
return normalizeLineEndings(content);
|
|
4228
4229
|
}
|
|
4230
|
+
async function readJsonFile(filePath) {
|
|
4231
|
+
const content = await readFile(filePath, "utf8");
|
|
4232
|
+
return JSON.parse(content);
|
|
4233
|
+
}
|
|
4229
4234
|
async function findGitRoot(startPath) {
|
|
4230
4235
|
let currentDir = path.dirname(path.resolve(startPath));
|
|
4231
4236
|
const root2 = path.parse(currentDir).root;
|
|
@@ -4331,7 +4336,7 @@ var BASE_TARGET_SCHEMA = external_exports.object({
|
|
|
4331
4336
|
judge_target: external_exports.string().optional(),
|
|
4332
4337
|
workers: external_exports.number().int().min(1).optional()
|
|
4333
4338
|
}).passthrough();
|
|
4334
|
-
var DEFAULT_AZURE_API_VERSION = "2024-
|
|
4339
|
+
var DEFAULT_AZURE_API_VERSION = "2024-12-01-preview";
|
|
4335
4340
|
function normalizeAzureApiVersion(value) {
|
|
4336
4341
|
if (!value) {
|
|
4337
4342
|
return DEFAULT_AZURE_API_VERSION;
|
|
@@ -4375,7 +4380,7 @@ function resolveRetryConfig(target) {
|
|
|
4375
4380
|
retryableStatusCodes
|
|
4376
4381
|
};
|
|
4377
4382
|
}
|
|
4378
|
-
function resolveTargetDefinition(definition, env = process.env) {
|
|
4383
|
+
function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
4379
4384
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
4380
4385
|
const provider = parsed.provider.toLowerCase();
|
|
4381
4386
|
const providerBatching = resolveOptionalBoolean(
|
|
@@ -4448,7 +4453,7 @@ function resolveTargetDefinition(definition, env = process.env) {
|
|
|
4448
4453
|
judgeTarget: parsed.judge_target,
|
|
4449
4454
|
workers: parsed.workers,
|
|
4450
4455
|
providerBatching,
|
|
4451
|
-
config: resolveCliConfig(parsed, env)
|
|
4456
|
+
config: resolveCliConfig(parsed, env, evalFilePath)
|
|
4452
4457
|
};
|
|
4453
4458
|
default:
|
|
4454
4459
|
throw new Error(`Unsupported provider '${parsed.provider}' in target '${parsed.name}'`);
|
|
@@ -4566,7 +4571,8 @@ function normalizeCodexLogFormat(value) {
|
|
|
4566
4571
|
}
|
|
4567
4572
|
function resolveMockConfig(target) {
|
|
4568
4573
|
const response = typeof target.response === "string" ? target.response : void 0;
|
|
4569
|
-
|
|
4574
|
+
const trace2 = Array.isArray(target.trace) ? target.trace : void 0;
|
|
4575
|
+
return { response, trace: trace2 };
|
|
4570
4576
|
}
|
|
4571
4577
|
function resolveVSCodeConfig(target, env, insiders) {
|
|
4572
4578
|
const workspaceTemplateEnvVar = resolveOptionalLiteralString(
|
|
@@ -4598,15 +4604,18 @@ function resolveVSCodeConfig(target, env, insiders) {
|
|
|
4598
4604
|
workspaceTemplate
|
|
4599
4605
|
};
|
|
4600
4606
|
}
|
|
4601
|
-
function resolveCliConfig(target, env) {
|
|
4607
|
+
function resolveCliConfig(target, env, evalFilePath) {
|
|
4602
4608
|
const commandTemplateSource = target.command_template ?? target.commandTemplate;
|
|
4603
4609
|
const filesFormat = resolveOptionalLiteralString(
|
|
4604
4610
|
target.files_format ?? target.filesFormat ?? target.attachments_format ?? target.attachmentsFormat
|
|
4605
4611
|
);
|
|
4606
|
-
|
|
4612
|
+
let cwd = resolveOptionalString(target.cwd, env, `${target.name} working directory`, {
|
|
4607
4613
|
allowLiteral: true,
|
|
4608
4614
|
optionalEnv: true
|
|
4609
4615
|
});
|
|
4616
|
+
if (!cwd && evalFilePath) {
|
|
4617
|
+
cwd = path2.dirname(path2.resolve(evalFilePath));
|
|
4618
|
+
}
|
|
4610
4619
|
const timeoutMs = resolveTimeoutMs(
|
|
4611
4620
|
target.timeout_seconds ?? target.timeoutSeconds,
|
|
4612
4621
|
`${target.name} timeout`
|
|
@@ -4724,17 +4733,15 @@ function resolveOptionalString(source2, env, description, options) {
|
|
|
4724
4733
|
if (envVarMatch) {
|
|
4725
4734
|
const varName = envVarMatch[1];
|
|
4726
4735
|
const envValue = env[varName];
|
|
4727
|
-
if (envValue !== void 0) {
|
|
4728
|
-
if (envValue.trim().length === 0) {
|
|
4729
|
-
throw new Error(`Environment variable '${varName}' for ${description} is empty`);
|
|
4730
|
-
}
|
|
4731
|
-
return envValue;
|
|
4732
|
-
}
|
|
4733
4736
|
const optionalEnv = options?.optionalEnv ?? false;
|
|
4734
|
-
if (
|
|
4735
|
-
|
|
4737
|
+
if (envValue === void 0 || envValue.trim().length === 0) {
|
|
4738
|
+
if (optionalEnv) {
|
|
4739
|
+
return void 0;
|
|
4740
|
+
}
|
|
4741
|
+
const status = envValue === void 0 ? "is not set" : "is empty";
|
|
4742
|
+
throw new Error(`Environment variable '${varName}' required for ${description} ${status}`);
|
|
4736
4743
|
}
|
|
4737
|
-
|
|
4744
|
+
return envValue;
|
|
4738
4745
|
}
|
|
4739
4746
|
const allowLiteral = options?.allowLiteral ?? false;
|
|
4740
4747
|
if (!allowLiteral) {
|
|
@@ -4889,7 +4896,7 @@ import micromatch from "micromatch";
|
|
|
4889
4896
|
import { parse as parse5 } from "yaml";
|
|
4890
4897
|
import { constants as constants3 } from "node:fs";
|
|
4891
4898
|
import { access as access3 } from "node:fs/promises";
|
|
4892
|
-
import
|
|
4899
|
+
import path13 from "node:path";
|
|
4893
4900
|
import path32 from "node:path";
|
|
4894
4901
|
import { readFile as readFile22 } from "node:fs/promises";
|
|
4895
4902
|
import { readFile as readFile32 } from "node:fs/promises";
|
|
@@ -5985,10 +5992,10 @@ function assignProp(target, prop, value) {
|
|
|
5985
5992
|
configurable: true
|
|
5986
5993
|
});
|
|
5987
5994
|
}
|
|
5988
|
-
function getElementAtPath(obj,
|
|
5989
|
-
if (!
|
|
5995
|
+
function getElementAtPath(obj, path27) {
|
|
5996
|
+
if (!path27)
|
|
5990
5997
|
return obj;
|
|
5991
|
-
return
|
|
5998
|
+
return path27.reduce((acc, key2) => acc?.[key2], obj);
|
|
5992
5999
|
}
|
|
5993
6000
|
function promiseAllObject(promisesObj) {
|
|
5994
6001
|
const keys = Object.keys(promisesObj);
|
|
@@ -6308,11 +6315,11 @@ function aborted(x, startIndex = 0) {
|
|
|
6308
6315
|
}
|
|
6309
6316
|
return false;
|
|
6310
6317
|
}
|
|
6311
|
-
function prefixIssues(
|
|
6318
|
+
function prefixIssues(path27, issues) {
|
|
6312
6319
|
return issues.map((iss) => {
|
|
6313
6320
|
var _a17;
|
|
6314
6321
|
(_a17 = iss).path ?? (_a17.path = []);
|
|
6315
|
-
iss.path.unshift(
|
|
6322
|
+
iss.path.unshift(path27);
|
|
6316
6323
|
return iss;
|
|
6317
6324
|
});
|
|
6318
6325
|
}
|
|
@@ -6449,7 +6456,7 @@ function treeifyError(error40, _mapper) {
|
|
|
6449
6456
|
return issue2.message;
|
|
6450
6457
|
};
|
|
6451
6458
|
const result = { errors: [] };
|
|
6452
|
-
const processError = (error41,
|
|
6459
|
+
const processError = (error41, path27 = []) => {
|
|
6453
6460
|
var _a17, _b8;
|
|
6454
6461
|
for (const issue2 of error41.issues) {
|
|
6455
6462
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -6459,7 +6466,7 @@ function treeifyError(error40, _mapper) {
|
|
|
6459
6466
|
} else if (issue2.code === "invalid_element") {
|
|
6460
6467
|
processError({ issues: issue2.issues }, issue2.path);
|
|
6461
6468
|
} else {
|
|
6462
|
-
const fullpath = [...
|
|
6469
|
+
const fullpath = [...path27, ...issue2.path];
|
|
6463
6470
|
if (fullpath.length === 0) {
|
|
6464
6471
|
result.errors.push(mapper(issue2));
|
|
6465
6472
|
continue;
|
|
@@ -6489,9 +6496,9 @@ function treeifyError(error40, _mapper) {
|
|
|
6489
6496
|
processError(error40);
|
|
6490
6497
|
return result;
|
|
6491
6498
|
}
|
|
6492
|
-
function toDotPath(
|
|
6499
|
+
function toDotPath(path27) {
|
|
6493
6500
|
const segs = [];
|
|
6494
|
-
for (const seg of
|
|
6501
|
+
for (const seg of path27) {
|
|
6495
6502
|
if (typeof seg === "number")
|
|
6496
6503
|
segs.push(`[${seg}]`);
|
|
6497
6504
|
else if (typeof seg === "symbol")
|
|
@@ -26044,14 +26051,14 @@ function createAzure(options = {}) {
|
|
|
26044
26051
|
description: "Azure OpenAI resource name"
|
|
26045
26052
|
});
|
|
26046
26053
|
const apiVersion = (_a17 = options.apiVersion) != null ? _a17 : "v1";
|
|
26047
|
-
const url2 = ({ path:
|
|
26054
|
+
const url2 = ({ path: path27, modelId }) => {
|
|
26048
26055
|
var _a24;
|
|
26049
26056
|
const baseUrlPrefix = (_a24 = options.baseURL) != null ? _a24 : `https://${getResourceName()}.openai.azure.com/openai`;
|
|
26050
26057
|
let fullUrl;
|
|
26051
26058
|
if (options.useDeploymentBasedUrls) {
|
|
26052
|
-
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${
|
|
26059
|
+
fullUrl = new URL(`${baseUrlPrefix}/deployments/${modelId}${path27}`);
|
|
26053
26060
|
} else {
|
|
26054
|
-
fullUrl = new URL(`${baseUrlPrefix}/v1${
|
|
26061
|
+
fullUrl = new URL(`${baseUrlPrefix}/v1${path27}`);
|
|
26055
26062
|
}
|
|
26056
26063
|
fullUrl.searchParams.set("api-version", apiVersion);
|
|
26057
26064
|
return fullUrl.toString();
|
|
@@ -32499,12 +32506,12 @@ import path102 from "node:path";
|
|
|
32499
32506
|
|
|
32500
32507
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/agentDispatch.js
|
|
32501
32508
|
import { stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
|
|
32502
|
-
import
|
|
32509
|
+
import path11 from "node:path";
|
|
32503
32510
|
|
|
32504
32511
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/fs.js
|
|
32505
32512
|
import { constants as constants2 } from "node:fs";
|
|
32506
32513
|
import { access as access2, mkdir, readdir, rm, stat } from "node:fs/promises";
|
|
32507
|
-
import
|
|
32514
|
+
import path3 from "node:path";
|
|
32508
32515
|
async function pathExists(target) {
|
|
32509
32516
|
try {
|
|
32510
32517
|
await access2(target, constants2.F_OK);
|
|
@@ -32520,7 +32527,7 @@ async function readDirEntries(target) {
|
|
|
32520
32527
|
const entries = await readdir(target, { withFileTypes: true });
|
|
32521
32528
|
return entries.map((entry) => ({
|
|
32522
32529
|
name: entry.name,
|
|
32523
|
-
absolutePath:
|
|
32530
|
+
absolutePath: path3.join(target, entry.name),
|
|
32524
32531
|
isDirectory: entry.isDirectory()
|
|
32525
32532
|
}));
|
|
32526
32533
|
}
|
|
@@ -32535,9 +32542,9 @@ async function removeIfExists(target) {
|
|
|
32535
32542
|
}
|
|
32536
32543
|
|
|
32537
32544
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/path.js
|
|
32538
|
-
import
|
|
32545
|
+
import path4 from "node:path";
|
|
32539
32546
|
function pathToFileUri(filePath) {
|
|
32540
|
-
const absolutePath =
|
|
32547
|
+
const absolutePath = path4.isAbsolute(filePath) ? filePath : path4.resolve(filePath);
|
|
32541
32548
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
32542
32549
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
32543
32550
|
return `file:///${normalizedPath}`;
|
|
@@ -32546,7 +32553,7 @@ function pathToFileUri(filePath) {
|
|
|
32546
32553
|
}
|
|
32547
32554
|
|
|
32548
32555
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/promptBuilder.js
|
|
32549
|
-
import
|
|
32556
|
+
import path5 from "node:path";
|
|
32550
32557
|
|
|
32551
32558
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/template.js
|
|
32552
32559
|
function renderTemplate(content, variables) {
|
|
@@ -32636,8 +32643,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
32636
32643
|
});
|
|
32637
32644
|
}
|
|
32638
32645
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
32639
|
-
const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${
|
|
32640
|
-
const responseList = responseFiles.map((file2) => `"${
|
|
32646
|
+
const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${path5.basename(file2)}`).join("\n");
|
|
32647
|
+
const responseList = responseFiles.map((file2) => `"${path5.basename(file2)}"`).join(", ");
|
|
32641
32648
|
return renderTemplate(templateContent, {
|
|
32642
32649
|
requestFiles: requestLines,
|
|
32643
32650
|
responseList
|
|
@@ -32646,7 +32653,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
32646
32653
|
|
|
32647
32654
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/responseWaiter.js
|
|
32648
32655
|
import { readFile as readFile2 } from "node:fs/promises";
|
|
32649
|
-
import
|
|
32656
|
+
import path6 from "node:path";
|
|
32650
32657
|
|
|
32651
32658
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/time.js
|
|
32652
32659
|
function sleep(ms) {
|
|
@@ -32695,7 +32702,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
32695
32702
|
}
|
|
32696
32703
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false) {
|
|
32697
32704
|
if (!silent) {
|
|
32698
|
-
const fileList = responseFilesFinal.map((file2) =>
|
|
32705
|
+
const fileList = responseFilesFinal.map((file2) => path6.basename(file2)).join(", ");
|
|
32699
32706
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
32700
32707
|
}
|
|
32701
32708
|
try {
|
|
@@ -32745,17 +32752,17 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
32745
32752
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/vscodeProcess.js
|
|
32746
32753
|
import { exec, spawn } from "node:child_process";
|
|
32747
32754
|
import { mkdir as mkdir2, writeFile } from "node:fs/promises";
|
|
32748
|
-
import
|
|
32755
|
+
import path8 from "node:path";
|
|
32749
32756
|
import { promisify } from "node:util";
|
|
32750
32757
|
|
|
32751
32758
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/constants.js
|
|
32752
32759
|
import os from "node:os";
|
|
32753
|
-
import
|
|
32760
|
+
import path7 from "node:path";
|
|
32754
32761
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
32755
32762
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
32756
32763
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
32757
32764
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
32758
|
-
return
|
|
32765
|
+
return path7.join(os.homedir(), ".subagent", folder);
|
|
32759
32766
|
}
|
|
32760
32767
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
32761
32768
|
|
|
@@ -32782,11 +32789,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
32782
32789
|
spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
|
|
32783
32790
|
return true;
|
|
32784
32791
|
}
|
|
32785
|
-
const aliveFile =
|
|
32792
|
+
const aliveFile = path8.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
32786
32793
|
await removeIfExists(aliveFile);
|
|
32787
|
-
const githubAgentsDir =
|
|
32794
|
+
const githubAgentsDir = path8.join(subagentDir, ".github", "agents");
|
|
32788
32795
|
await mkdir2(githubAgentsDir, { recursive: true });
|
|
32789
|
-
const wakeupDst =
|
|
32796
|
+
const wakeupDst = path8.join(githubAgentsDir, "wakeup.md");
|
|
32790
32797
|
await writeFile(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
32791
32798
|
spawn(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
|
|
32792
32799
|
await sleep(100);
|
|
@@ -32796,7 +32803,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
32796
32803
|
"chat",
|
|
32797
32804
|
"-m",
|
|
32798
32805
|
wakeupChatId,
|
|
32799
|
-
`create a file named .alive in the ${
|
|
32806
|
+
`create a file named .alive in the ${path8.basename(subagentDir)} folder`
|
|
32800
32807
|
];
|
|
32801
32808
|
spawn(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
|
|
32802
32809
|
const start = Date.now();
|
|
@@ -32811,10 +32818,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
32811
32818
|
}
|
|
32812
32819
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
32813
32820
|
try {
|
|
32814
|
-
const workspacePath =
|
|
32815
|
-
const messagesDir =
|
|
32821
|
+
const workspacePath = path8.join(subagentDir, `${path8.basename(subagentDir)}.code-workspace`);
|
|
32822
|
+
const messagesDir = path8.join(subagentDir, "messages");
|
|
32816
32823
|
await mkdir2(messagesDir, { recursive: true });
|
|
32817
|
-
const reqFile =
|
|
32824
|
+
const reqFile = path8.join(messagesDir, `${timestamp}_req.md`);
|
|
32818
32825
|
await writeFile(reqFile, requestInstructions, { encoding: "utf8" });
|
|
32819
32826
|
const reqUri = pathToFileUri(reqFile);
|
|
32820
32827
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -32822,8 +32829,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
32822
32829
|
chatArgs.push("-a", attachment);
|
|
32823
32830
|
}
|
|
32824
32831
|
chatArgs.push("-a", reqFile);
|
|
32825
|
-
chatArgs.push(`Follow instructions in [${
|
|
32826
|
-
const workspaceReady = await ensureWorkspaceFocused(workspacePath,
|
|
32832
|
+
chatArgs.push(`Follow instructions in [${path8.basename(reqFile)}](${reqUri})`);
|
|
32833
|
+
const workspaceReady = await ensureWorkspaceFocused(workspacePath, path8.basename(subagentDir), subagentDir, vscodeCmd);
|
|
32827
32834
|
if (!workspaceReady) {
|
|
32828
32835
|
console.error("warning: Workspace may not be fully ready");
|
|
32829
32836
|
}
|
|
@@ -32837,15 +32844,15 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
32837
32844
|
}
|
|
32838
32845
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
32839
32846
|
try {
|
|
32840
|
-
const workspacePath =
|
|
32841
|
-
const messagesDir =
|
|
32847
|
+
const workspacePath = path8.join(subagentDir, `${path8.basename(subagentDir)}.code-workspace`);
|
|
32848
|
+
const messagesDir = path8.join(subagentDir, "messages");
|
|
32842
32849
|
await mkdir2(messagesDir, { recursive: true });
|
|
32843
32850
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
32844
32851
|
for (const attachment of attachmentPaths) {
|
|
32845
32852
|
chatArgs.push("-a", attachment);
|
|
32846
32853
|
}
|
|
32847
32854
|
chatArgs.push(chatInstruction);
|
|
32848
|
-
const workspaceReady = await ensureWorkspaceFocused(workspacePath,
|
|
32855
|
+
const workspaceReady = await ensureWorkspaceFocused(workspacePath, path8.basename(subagentDir), subagentDir, vscodeCmd);
|
|
32849
32856
|
if (!workspaceReady) {
|
|
32850
32857
|
console.error("warning: Workspace may not be fully ready");
|
|
32851
32858
|
}
|
|
@@ -32860,10 +32867,10 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
32860
32867
|
|
|
32861
32868
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/workspaceManager.js
|
|
32862
32869
|
import { copyFile, mkdir as mkdir3, readFile as readFile3, readdir as readdir2, stat as stat2, writeFile as writeFile2 } from "node:fs/promises";
|
|
32863
|
-
import
|
|
32870
|
+
import path10 from "node:path";
|
|
32864
32871
|
|
|
32865
32872
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/utils/workspace.js
|
|
32866
|
-
import
|
|
32873
|
+
import path9 from "node:path";
|
|
32867
32874
|
|
|
32868
32875
|
// ../../node_modules/.bun/json5@2.2.3/node_modules/json5/dist/index.mjs
|
|
32869
32876
|
var Space_Separator = /[\u1680\u2000-\u200A\u202F\u205F\u3000]/;
|
|
@@ -33966,10 +33973,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
33966
33973
|
}
|
|
33967
33974
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
33968
33975
|
const folderPath = folder.path;
|
|
33969
|
-
if (
|
|
33976
|
+
if (path9.isAbsolute(folderPath)) {
|
|
33970
33977
|
return folder;
|
|
33971
33978
|
}
|
|
33972
|
-
const absolutePath =
|
|
33979
|
+
const absolutePath = path9.resolve(templateDir, folderPath);
|
|
33973
33980
|
return {
|
|
33974
33981
|
...folder,
|
|
33975
33982
|
path: absolutePath
|
|
@@ -33991,19 +33998,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
33991
33998
|
if (locationMap && typeof locationMap === "object") {
|
|
33992
33999
|
const transformedMap = {};
|
|
33993
34000
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
33994
|
-
const isAbsolute =
|
|
34001
|
+
const isAbsolute = path9.isAbsolute(locationPath);
|
|
33995
34002
|
if (isAbsolute) {
|
|
33996
34003
|
transformedMap[locationPath] = value;
|
|
33997
34004
|
} else {
|
|
33998
34005
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
33999
34006
|
if (firstGlobIndex === -1) {
|
|
34000
|
-
const resolvedPath =
|
|
34007
|
+
const resolvedPath = path9.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
34001
34008
|
transformedMap[resolvedPath] = value;
|
|
34002
34009
|
} else {
|
|
34003
34010
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
34004
34011
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
34005
34012
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
34006
|
-
const resolvedPath = (
|
|
34013
|
+
const resolvedPath = (path9.resolve(templateDir, basePath) + patternPath).replace(/\\/g, "/");
|
|
34007
34014
|
transformedMap[resolvedPath] = value;
|
|
34008
34015
|
}
|
|
34009
34016
|
}
|
|
@@ -34041,7 +34048,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
34041
34048
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
34042
34049
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
34043
34050
|
for (const subagent of subagents) {
|
|
34044
|
-
const lockFile =
|
|
34051
|
+
const lockFile = path10.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
34045
34052
|
if (!await pathExists(lockFile)) {
|
|
34046
34053
|
return subagent.absolutePath;
|
|
34047
34054
|
}
|
|
@@ -34051,7 +34058,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
34051
34058
|
async function copyAgentConfig(subagentDir, workspaceTemplate) {
|
|
34052
34059
|
let workspaceContent;
|
|
34053
34060
|
if (workspaceTemplate) {
|
|
34054
|
-
const workspaceSrc =
|
|
34061
|
+
const workspaceSrc = path10.resolve(workspaceTemplate);
|
|
34055
34062
|
if (!await pathExists(workspaceSrc)) {
|
|
34056
34063
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
34057
34064
|
}
|
|
@@ -34064,37 +34071,37 @@ async function copyAgentConfig(subagentDir, workspaceTemplate) {
|
|
|
34064
34071
|
} else {
|
|
34065
34072
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
34066
34073
|
}
|
|
34067
|
-
const workspaceName = `${
|
|
34068
|
-
const workspaceDst =
|
|
34069
|
-
const templateDir = workspaceTemplate ?
|
|
34074
|
+
const workspaceName = `${path10.basename(subagentDir)}.code-workspace`;
|
|
34075
|
+
const workspaceDst = path10.join(subagentDir, workspaceName);
|
|
34076
|
+
const templateDir = workspaceTemplate ? path10.dirname(path10.resolve(workspaceTemplate)) : subagentDir;
|
|
34070
34077
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
34071
34078
|
const transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
34072
34079
|
await writeFile2(workspaceDst, transformedContent, "utf8");
|
|
34073
|
-
const messagesDir =
|
|
34080
|
+
const messagesDir = path10.join(subagentDir, "messages");
|
|
34074
34081
|
await mkdir3(messagesDir, { recursive: true });
|
|
34075
34082
|
return { workspace: workspaceDst, messagesDir };
|
|
34076
34083
|
}
|
|
34077
34084
|
async function createSubagentLock(subagentDir) {
|
|
34078
|
-
const messagesDir =
|
|
34085
|
+
const messagesDir = path10.join(subagentDir, "messages");
|
|
34079
34086
|
if (await pathExists(messagesDir)) {
|
|
34080
34087
|
const files = await readdir2(messagesDir);
|
|
34081
34088
|
await Promise.all(files.map(async (file2) => {
|
|
34082
|
-
const target =
|
|
34089
|
+
const target = path10.join(messagesDir, file2);
|
|
34083
34090
|
await removeIfExists(target);
|
|
34084
34091
|
}));
|
|
34085
34092
|
}
|
|
34086
|
-
const githubAgentsDir =
|
|
34093
|
+
const githubAgentsDir = path10.join(subagentDir, ".github", "agents");
|
|
34087
34094
|
if (await pathExists(githubAgentsDir)) {
|
|
34088
34095
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
34089
34096
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
34090
|
-
await Promise.all(agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(
|
|
34097
|
+
await Promise.all(agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(path10.join(githubAgentsDir, file2))));
|
|
34091
34098
|
}
|
|
34092
|
-
const lockFile =
|
|
34099
|
+
const lockFile = path10.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
34093
34100
|
await writeFile2(lockFile, "", { encoding: "utf8" });
|
|
34094
34101
|
return lockFile;
|
|
34095
34102
|
}
|
|
34096
34103
|
async function removeSubagentLock(subagentDir) {
|
|
34097
|
-
const lockFile =
|
|
34104
|
+
const lockFile = path10.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
34098
34105
|
await removeIfExists(lockFile);
|
|
34099
34106
|
}
|
|
34100
34107
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun) {
|
|
@@ -34114,9 +34121,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
34114
34121
|
return 1;
|
|
34115
34122
|
}
|
|
34116
34123
|
if (promptFile) {
|
|
34117
|
-
const githubAgentsDir =
|
|
34124
|
+
const githubAgentsDir = path10.join(subagentDir, ".github", "agents");
|
|
34118
34125
|
await mkdir3(githubAgentsDir, { recursive: true });
|
|
34119
|
-
const agentFile =
|
|
34126
|
+
const agentFile = path10.join(githubAgentsDir, `${chatId}.md`);
|
|
34120
34127
|
try {
|
|
34121
34128
|
await copyFile(promptFile, agentFile);
|
|
34122
34129
|
} catch (error40) {
|
|
@@ -34135,7 +34142,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
34135
34142
|
if (!promptFile) {
|
|
34136
34143
|
return void 0;
|
|
34137
34144
|
}
|
|
34138
|
-
const resolvedPrompt =
|
|
34145
|
+
const resolvedPrompt = path11.resolve(promptFile);
|
|
34139
34146
|
if (!await pathExists(resolvedPrompt)) {
|
|
34140
34147
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
34141
34148
|
}
|
|
@@ -34151,7 +34158,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
34151
34158
|
}
|
|
34152
34159
|
const resolved = [];
|
|
34153
34160
|
for (const attachment of extraAttachments) {
|
|
34154
|
-
const resolvedPath =
|
|
34161
|
+
const resolvedPath = path11.resolve(attachment);
|
|
34155
34162
|
if (!await pathExists(resolvedPath)) {
|
|
34156
34163
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
34157
34164
|
}
|
|
@@ -34180,7 +34187,7 @@ async function dispatchAgentSession(options) {
|
|
|
34180
34187
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
34181
34188
|
};
|
|
34182
34189
|
}
|
|
34183
|
-
const subagentName =
|
|
34190
|
+
const subagentName = path11.basename(subagentDir);
|
|
34184
34191
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
34185
34192
|
const preparationResult = await prepareSubagentDirectory(subagentDir, resolvedPrompt, chatId, workspaceTemplate, dryRun);
|
|
34186
34193
|
if (preparationResult !== 0) {
|
|
@@ -34201,9 +34208,9 @@ async function dispatchAgentSession(options) {
|
|
|
34201
34208
|
};
|
|
34202
34209
|
}
|
|
34203
34210
|
const timestamp = generateTimestamp();
|
|
34204
|
-
const messagesDir =
|
|
34205
|
-
const responseFileTmp =
|
|
34206
|
-
const responseFileFinal =
|
|
34211
|
+
const messagesDir = path11.join(subagentDir, "messages");
|
|
34212
|
+
const responseFileTmp = path11.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
34213
|
+
const responseFileFinal = path11.join(messagesDir, `${timestamp}_res.md`);
|
|
34207
34214
|
const requestInstructions = createRequestPrompt(userQuery, responseFileTmp, responseFileFinal, templateContent);
|
|
34208
34215
|
if (dryRun) {
|
|
34209
34216
|
return {
|
|
@@ -34293,7 +34300,7 @@ async function dispatchBatchAgent(options) {
|
|
|
34293
34300
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
34294
34301
|
};
|
|
34295
34302
|
}
|
|
34296
|
-
subagentName =
|
|
34303
|
+
subagentName = path11.basename(subagentDir);
|
|
34297
34304
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
34298
34305
|
const preparationResult = await prepareSubagentDirectory(subagentDir, resolvedPrompt, chatId, workspaceTemplate, dryRun);
|
|
34299
34306
|
if (preparationResult !== 0) {
|
|
@@ -34318,11 +34325,11 @@ async function dispatchBatchAgent(options) {
|
|
|
34318
34325
|
};
|
|
34319
34326
|
}
|
|
34320
34327
|
const timestamp = generateTimestamp();
|
|
34321
|
-
const messagesDir =
|
|
34322
|
-
requestFiles = userQueries.map((_, index) =>
|
|
34323
|
-
const responseTmpFiles = userQueries.map((_, index) =>
|
|
34324
|
-
responseFilesFinal = userQueries.map((_, index) =>
|
|
34325
|
-
const orchestratorFile =
|
|
34328
|
+
const messagesDir = path11.join(subagentDir, "messages");
|
|
34329
|
+
requestFiles = userQueries.map((_, index) => path11.join(messagesDir, `${timestamp}_${index}_req.md`));
|
|
34330
|
+
const responseTmpFiles = userQueries.map((_, index) => path11.join(messagesDir, `${timestamp}_${index}_res.tmp.md`));
|
|
34331
|
+
responseFilesFinal = userQueries.map((_, index) => path11.join(messagesDir, `${timestamp}_${index}_res.md`));
|
|
34332
|
+
const orchestratorFile = path11.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
34326
34333
|
if (!dryRun) {
|
|
34327
34334
|
await Promise.all(userQueries.map((query, index) => writeFile3(requestFiles[index], createBatchRequestPrompt(query, responseTmpFiles[index], responseFilesFinal[index], batchRequestTemplateContent), { encoding: "utf8" })));
|
|
34328
34335
|
const orchestratorContent = createBatchOrchestratorPrompt(requestFiles, responseFilesFinal, orchestratorTemplateContent);
|
|
@@ -34391,7 +34398,7 @@ async function dispatchBatchAgent(options) {
|
|
|
34391
34398
|
|
|
34392
34399
|
// ../../node_modules/.bun/subagent@0.5.6/node_modules/subagent/dist/vscode/provision.js
|
|
34393
34400
|
import { writeFile as writeFile4 } from "node:fs/promises";
|
|
34394
|
-
import
|
|
34401
|
+
import path12 from "node:path";
|
|
34395
34402
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
34396
34403
|
folders: [
|
|
34397
34404
|
{
|
|
@@ -34414,7 +34421,7 @@ async function provisionSubagents(options) {
|
|
|
34414
34421
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
34415
34422
|
throw new Error("subagents must be a positive integer");
|
|
34416
34423
|
}
|
|
34417
|
-
const targetPath =
|
|
34424
|
+
const targetPath = path12.resolve(targetRoot);
|
|
34418
34425
|
if (!dryRun) {
|
|
34419
34426
|
await ensureDir(targetPath);
|
|
34420
34427
|
}
|
|
@@ -34435,7 +34442,7 @@ async function provisionSubagents(options) {
|
|
|
34435
34442
|
continue;
|
|
34436
34443
|
}
|
|
34437
34444
|
highestNumber = Math.max(highestNumber, parsed);
|
|
34438
|
-
const lockFile =
|
|
34445
|
+
const lockFile = path12.join(entry.absolutePath, lockName);
|
|
34439
34446
|
const locked = await pathExists(lockFile);
|
|
34440
34447
|
if (locked) {
|
|
34441
34448
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -34452,11 +34459,11 @@ async function provisionSubagents(options) {
|
|
|
34452
34459
|
break;
|
|
34453
34460
|
}
|
|
34454
34461
|
const subagentDir = subagent.absolutePath;
|
|
34455
|
-
const githubAgentsDir =
|
|
34456
|
-
const lockFile =
|
|
34457
|
-
const workspaceDst =
|
|
34458
|
-
const wakeupDst =
|
|
34459
|
-
const subagentDst =
|
|
34462
|
+
const githubAgentsDir = path12.join(subagentDir, ".github", "agents");
|
|
34463
|
+
const lockFile = path12.join(subagentDir, lockName);
|
|
34464
|
+
const workspaceDst = path12.join(subagentDir, `${path12.basename(subagentDir)}.code-workspace`);
|
|
34465
|
+
const wakeupDst = path12.join(githubAgentsDir, "wakeup.md");
|
|
34466
|
+
const subagentDst = path12.join(githubAgentsDir, "subagent.md");
|
|
34460
34467
|
const isLocked = await pathExists(lockFile);
|
|
34461
34468
|
if (isLocked && !force) {
|
|
34462
34469
|
continue;
|
|
@@ -34494,11 +34501,11 @@ async function provisionSubagents(options) {
|
|
|
34494
34501
|
let nextIndex = highestNumber;
|
|
34495
34502
|
while (subagentsProvisioned < subagents) {
|
|
34496
34503
|
nextIndex += 1;
|
|
34497
|
-
const subagentDir =
|
|
34498
|
-
const githubAgentsDir =
|
|
34499
|
-
const workspaceDst =
|
|
34500
|
-
const wakeupDst =
|
|
34501
|
-
const subagentDst =
|
|
34504
|
+
const subagentDir = path12.join(targetPath, `subagent-${nextIndex}`);
|
|
34505
|
+
const githubAgentsDir = path12.join(subagentDir, ".github", "agents");
|
|
34506
|
+
const workspaceDst = path12.join(subagentDir, `${path12.basename(subagentDir)}.code-workspace`);
|
|
34507
|
+
const wakeupDst = path12.join(githubAgentsDir, "wakeup.md");
|
|
34508
|
+
const subagentDst = path12.join(githubAgentsDir, "subagent.md");
|
|
34502
34509
|
if (!dryRun) {
|
|
34503
34510
|
await ensureDir(subagentDir);
|
|
34504
34511
|
await ensureDir(githubAgentsDir);
|
|
@@ -34562,11 +34569,47 @@ function isTestMessage(value) {
|
|
|
34562
34569
|
}
|
|
34563
34570
|
return candidate.content.every(isJsonObject);
|
|
34564
34571
|
}
|
|
34565
|
-
var EVALUATOR_KIND_VALUES = [
|
|
34572
|
+
var EVALUATOR_KIND_VALUES = [
|
|
34573
|
+
"code_judge",
|
|
34574
|
+
"llm_judge",
|
|
34575
|
+
"rubric",
|
|
34576
|
+
"composite",
|
|
34577
|
+
"tool_trajectory",
|
|
34578
|
+
"expected_messages"
|
|
34579
|
+
];
|
|
34566
34580
|
var EVALUATOR_KIND_SET = new Set(EVALUATOR_KIND_VALUES);
|
|
34567
34581
|
function isEvaluatorKind(value) {
|
|
34568
34582
|
return typeof value === "string" && EVALUATOR_KIND_SET.has(value);
|
|
34569
34583
|
}
|
|
34584
|
+
function isTraceEventType(value) {
|
|
34585
|
+
return typeof value === "string" && ["model_step", "tool_call", "tool_result", "message", "error"].includes(value);
|
|
34586
|
+
}
|
|
34587
|
+
function isTraceEvent(value) {
|
|
34588
|
+
if (typeof value !== "object" || value === null) {
|
|
34589
|
+
return false;
|
|
34590
|
+
}
|
|
34591
|
+
const candidate = value;
|
|
34592
|
+
return isTraceEventType(candidate.type) && typeof candidate.timestamp === "string";
|
|
34593
|
+
}
|
|
34594
|
+
function computeTraceSummary(trace2) {
|
|
34595
|
+
const toolCallCounts = {};
|
|
34596
|
+
let errorCount = 0;
|
|
34597
|
+
for (const event of trace2) {
|
|
34598
|
+
if (event.type === "tool_call" && event.name) {
|
|
34599
|
+
toolCallCounts[event.name] = (toolCallCounts[event.name] ?? 0) + 1;
|
|
34600
|
+
}
|
|
34601
|
+
if (event.type === "error") {
|
|
34602
|
+
errorCount++;
|
|
34603
|
+
}
|
|
34604
|
+
}
|
|
34605
|
+
const toolNames = Object.keys(toolCallCounts).sort();
|
|
34606
|
+
return {
|
|
34607
|
+
eventCount: trace2.length,
|
|
34608
|
+
toolNames,
|
|
34609
|
+
toolCallsByName: toolCallCounts,
|
|
34610
|
+
errorCount
|
|
34611
|
+
};
|
|
34612
|
+
}
|
|
34570
34613
|
function extractCodeBlocks(segments) {
|
|
34571
34614
|
const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g;
|
|
34572
34615
|
const codeBlocks = [];
|
|
@@ -34660,15 +34703,15 @@ function resolveToAbsolutePath(candidate) {
|
|
|
34660
34703
|
if (candidate.startsWith("file://")) {
|
|
34661
34704
|
return new URL(candidate).pathname;
|
|
34662
34705
|
}
|
|
34663
|
-
return
|
|
34706
|
+
return path13.resolve(candidate);
|
|
34664
34707
|
}
|
|
34665
34708
|
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
34666
34709
|
}
|
|
34667
34710
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
34668
34711
|
const directories = [];
|
|
34669
34712
|
const seen = /* @__PURE__ */ new Set();
|
|
34670
|
-
const boundary =
|
|
34671
|
-
let current =
|
|
34713
|
+
const boundary = path13.resolve(repoRoot);
|
|
34714
|
+
let current = path13.resolve(path13.dirname(filePath));
|
|
34672
34715
|
while (current !== void 0) {
|
|
34673
34716
|
if (!seen.has(current)) {
|
|
34674
34717
|
directories.push(current);
|
|
@@ -34677,7 +34720,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
34677
34720
|
if (current === boundary) {
|
|
34678
34721
|
break;
|
|
34679
34722
|
}
|
|
34680
|
-
const parent =
|
|
34723
|
+
const parent = path13.dirname(current);
|
|
34681
34724
|
if (parent === current) {
|
|
34682
34725
|
break;
|
|
34683
34726
|
}
|
|
@@ -34691,16 +34734,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
34691
34734
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
34692
34735
|
const uniqueRoots = [];
|
|
34693
34736
|
const addRoot = (root2) => {
|
|
34694
|
-
const normalized =
|
|
34737
|
+
const normalized = path13.resolve(root2);
|
|
34695
34738
|
if (!uniqueRoots.includes(normalized)) {
|
|
34696
34739
|
uniqueRoots.push(normalized);
|
|
34697
34740
|
}
|
|
34698
34741
|
};
|
|
34699
|
-
let currentDir =
|
|
34742
|
+
let currentDir = path13.dirname(evalPath);
|
|
34700
34743
|
let reachedBoundary = false;
|
|
34701
34744
|
while (!reachedBoundary) {
|
|
34702
34745
|
addRoot(currentDir);
|
|
34703
|
-
const parentDir =
|
|
34746
|
+
const parentDir = path13.dirname(currentDir);
|
|
34704
34747
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
34705
34748
|
reachedBoundary = true;
|
|
34706
34749
|
} else {
|
|
@@ -34718,16 +34761,16 @@ function trimLeadingSeparators2(value) {
|
|
|
34718
34761
|
async function resolveFileReference2(rawValue, searchRoots) {
|
|
34719
34762
|
const displayPath = trimLeadingSeparators2(rawValue);
|
|
34720
34763
|
const potentialPaths = [];
|
|
34721
|
-
if (
|
|
34722
|
-
potentialPaths.push(
|
|
34764
|
+
if (path13.isAbsolute(rawValue)) {
|
|
34765
|
+
potentialPaths.push(path13.normalize(rawValue));
|
|
34723
34766
|
}
|
|
34724
34767
|
for (const base of searchRoots) {
|
|
34725
|
-
potentialPaths.push(
|
|
34768
|
+
potentialPaths.push(path13.resolve(base, displayPath));
|
|
34726
34769
|
}
|
|
34727
34770
|
const attempted = [];
|
|
34728
34771
|
const seen = /* @__PURE__ */ new Set();
|
|
34729
34772
|
for (const candidate of potentialPaths) {
|
|
34730
|
-
const absoluteCandidate =
|
|
34773
|
+
const absoluteCandidate = path13.resolve(candidate);
|
|
34731
34774
|
if (seen.has(absoluteCandidate)) {
|
|
34732
34775
|
continue;
|
|
34733
34776
|
}
|
|
@@ -35008,6 +35051,75 @@ async function parseEvaluators(rawEvalCase, globalExecution, searchRoots, evalId
|
|
|
35008
35051
|
});
|
|
35009
35052
|
continue;
|
|
35010
35053
|
}
|
|
35054
|
+
if (typeValue === "expected_messages") {
|
|
35055
|
+
evaluators.push({
|
|
35056
|
+
name: name16,
|
|
35057
|
+
type: "expected_messages"
|
|
35058
|
+
});
|
|
35059
|
+
continue;
|
|
35060
|
+
}
|
|
35061
|
+
if (typeValue === "tool_trajectory") {
|
|
35062
|
+
const mode = asString2(rawEvaluator.mode);
|
|
35063
|
+
if (mode !== "any_order" && mode !== "in_order" && mode !== "exact") {
|
|
35064
|
+
logWarning2(
|
|
35065
|
+
`Skipping tool_trajectory evaluator '${name16}' in '${evalId}': invalid mode '${mode}' (must be any_order, in_order, or exact)`
|
|
35066
|
+
);
|
|
35067
|
+
continue;
|
|
35068
|
+
}
|
|
35069
|
+
const rawMinimums = rawEvaluator.minimums;
|
|
35070
|
+
let minimums;
|
|
35071
|
+
if (rawMinimums !== void 0) {
|
|
35072
|
+
if (!isJsonObject2(rawMinimums)) {
|
|
35073
|
+
logWarning2(
|
|
35074
|
+
`Skipping tool_trajectory evaluator '${name16}' in '${evalId}': minimums must be an object`
|
|
35075
|
+
);
|
|
35076
|
+
continue;
|
|
35077
|
+
}
|
|
35078
|
+
minimums = {};
|
|
35079
|
+
for (const [toolName, count] of Object.entries(rawMinimums)) {
|
|
35080
|
+
if (typeof count === "number" && count >= 0) {
|
|
35081
|
+
minimums[toolName] = count;
|
|
35082
|
+
}
|
|
35083
|
+
}
|
|
35084
|
+
}
|
|
35085
|
+
const rawExpected = rawEvaluator.expected;
|
|
35086
|
+
let expected;
|
|
35087
|
+
if (rawExpected !== void 0) {
|
|
35088
|
+
if (!Array.isArray(rawExpected)) {
|
|
35089
|
+
logWarning2(
|
|
35090
|
+
`Skipping tool_trajectory evaluator '${name16}' in '${evalId}': expected must be an array`
|
|
35091
|
+
);
|
|
35092
|
+
continue;
|
|
35093
|
+
}
|
|
35094
|
+
expected = [];
|
|
35095
|
+
for (const item of rawExpected) {
|
|
35096
|
+
if (isJsonObject2(item) && typeof item.tool === "string") {
|
|
35097
|
+
expected.push({ tool: item.tool });
|
|
35098
|
+
}
|
|
35099
|
+
}
|
|
35100
|
+
}
|
|
35101
|
+
if (mode === "any_order" && !minimums) {
|
|
35102
|
+
logWarning2(
|
|
35103
|
+
`Skipping tool_trajectory evaluator '${name16}' in '${evalId}': any_order mode requires minimums`
|
|
35104
|
+
);
|
|
35105
|
+
continue;
|
|
35106
|
+
}
|
|
35107
|
+
if ((mode === "in_order" || mode === "exact") && !expected) {
|
|
35108
|
+
logWarning2(
|
|
35109
|
+
`Skipping tool_trajectory evaluator '${name16}' in '${evalId}': ${mode} mode requires expected`
|
|
35110
|
+
);
|
|
35111
|
+
continue;
|
|
35112
|
+
}
|
|
35113
|
+
const config2 = {
|
|
35114
|
+
name: name16,
|
|
35115
|
+
type: "tool_trajectory",
|
|
35116
|
+
mode,
|
|
35117
|
+
...minimums ? { minimums } : {},
|
|
35118
|
+
...expected ? { expected } : {}
|
|
35119
|
+
};
|
|
35120
|
+
evaluators.push(config2);
|
|
35121
|
+
continue;
|
|
35122
|
+
}
|
|
35011
35123
|
const prompt = asString2(rawEvaluator.prompt);
|
|
35012
35124
|
let promptPath;
|
|
35013
35125
|
if (prompt) {
|
|
@@ -35257,6 +35369,67 @@ ${detailBlock}${ANSI_RESET4}`);
|
|
|
35257
35369
|
console.warn(`${ANSI_YELLOW4}Warning: ${message}${ANSI_RESET4}`);
|
|
35258
35370
|
}
|
|
35259
35371
|
}
|
|
35372
|
+
async function processExpectedMessages(options) {
|
|
35373
|
+
const { messages, searchRoots, repoRootPath, verbose } = options;
|
|
35374
|
+
const segments = [];
|
|
35375
|
+
for (const message of messages) {
|
|
35376
|
+
const segment = {
|
|
35377
|
+
role: message.role
|
|
35378
|
+
};
|
|
35379
|
+
if (message.role === "assistant" && message.tool_calls !== void 0) {
|
|
35380
|
+
segment.tool_calls = message.tool_calls;
|
|
35381
|
+
}
|
|
35382
|
+
const content = message.content;
|
|
35383
|
+
if (typeof content === "string") {
|
|
35384
|
+
segment.content = content;
|
|
35385
|
+
} else if (Array.isArray(content)) {
|
|
35386
|
+
const processedContent = [];
|
|
35387
|
+
for (const rawSegment of content) {
|
|
35388
|
+
if (!isJsonObject(rawSegment)) {
|
|
35389
|
+
continue;
|
|
35390
|
+
}
|
|
35391
|
+
const segmentType = asString3(rawSegment.type);
|
|
35392
|
+
if (segmentType === "file") {
|
|
35393
|
+
const rawValue = asString3(rawSegment.value);
|
|
35394
|
+
if (!rawValue) {
|
|
35395
|
+
continue;
|
|
35396
|
+
}
|
|
35397
|
+
const { displayPath, resolvedPath, attempted } = await resolveFileReference2(
|
|
35398
|
+
rawValue,
|
|
35399
|
+
searchRoots
|
|
35400
|
+
);
|
|
35401
|
+
if (!resolvedPath) {
|
|
35402
|
+
const attempts = attempted.length ? [" Tried:", ...attempted.map((candidate) => ` ${candidate}`)] : void 0;
|
|
35403
|
+
logWarning3(`File not found in expected_messages: ${displayPath}`, attempts);
|
|
35404
|
+
continue;
|
|
35405
|
+
}
|
|
35406
|
+
try {
|
|
35407
|
+
const fileContent = (await readFile32(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
35408
|
+
processedContent.push({
|
|
35409
|
+
type: "file",
|
|
35410
|
+
path: displayPath,
|
|
35411
|
+
text: fileContent,
|
|
35412
|
+
resolvedPath: path42.resolve(resolvedPath)
|
|
35413
|
+
});
|
|
35414
|
+
if (verbose) {
|
|
35415
|
+
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
35416
|
+
console.log(` Resolved to: ${resolvedPath}`);
|
|
35417
|
+
}
|
|
35418
|
+
} catch (error40) {
|
|
35419
|
+
logWarning3(
|
|
35420
|
+
`Could not read expected output file ${resolvedPath}: ${error40.message}`
|
|
35421
|
+
);
|
|
35422
|
+
}
|
|
35423
|
+
continue;
|
|
35424
|
+
}
|
|
35425
|
+
processedContent.push(cloneJsonObject(rawSegment));
|
|
35426
|
+
}
|
|
35427
|
+
segment.content = processedContent;
|
|
35428
|
+
}
|
|
35429
|
+
segments.push(segment);
|
|
35430
|
+
}
|
|
35431
|
+
return segments;
|
|
35432
|
+
}
|
|
35260
35433
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
35261
35434
|
var ANSI_RESET5 = "\x1B[0m";
|
|
35262
35435
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
@@ -35555,12 +35728,10 @@ async function loadEvalCases(evalFilePath, repoRoot, options) {
|
|
|
35555
35728
|
messageType: "input",
|
|
35556
35729
|
verbose
|
|
35557
35730
|
});
|
|
35558
|
-
const outputSegments = hasExpectedMessages ? await
|
|
35731
|
+
const outputSegments = hasExpectedMessages ? await processExpectedMessages({
|
|
35559
35732
|
messages: expectedMessages,
|
|
35560
35733
|
searchRoots,
|
|
35561
35734
|
repoRootPath,
|
|
35562
|
-
guidelinePatterns,
|
|
35563
|
-
messageType: "output",
|
|
35564
35735
|
verbose
|
|
35565
35736
|
}) : [];
|
|
35566
35737
|
const codeSnippets = extractCodeBlocks(inputSegments);
|
|
@@ -36071,9 +36242,11 @@ var CliProvider = class {
|
|
|
36071
36242
|
const message = detail ? `${detail} (exit code ${codeText})` : `CLI exited with code ${codeText}`;
|
|
36072
36243
|
throw new Error(message);
|
|
36073
36244
|
}
|
|
36074
|
-
const
|
|
36245
|
+
const responseContent = await this.readAndCleanupOutputFile(outputFilePath);
|
|
36246
|
+
const parsed = this.parseOutputContent(responseContent);
|
|
36075
36247
|
return {
|
|
36076
|
-
text:
|
|
36248
|
+
text: parsed.text,
|
|
36249
|
+
trace: parsed.trace,
|
|
36077
36250
|
raw: {
|
|
36078
36251
|
command: renderedCommand,
|
|
36079
36252
|
stderr: result.stderr,
|
|
@@ -36083,6 +36256,31 @@ var CliProvider = class {
|
|
|
36083
36256
|
}
|
|
36084
36257
|
};
|
|
36085
36258
|
}
|
|
36259
|
+
/**
|
|
36260
|
+
* Parse output content from CLI.
|
|
36261
|
+
* If the content is valid JSON with a 'text' field, extract text and optional trace.
|
|
36262
|
+
* Otherwise, treat the entire content as plain text.
|
|
36263
|
+
*/
|
|
36264
|
+
parseOutputContent(content) {
|
|
36265
|
+
try {
|
|
36266
|
+
const parsed = JSON.parse(content);
|
|
36267
|
+
if (typeof parsed === "object" && parsed !== null && "text" in parsed) {
|
|
36268
|
+
const obj = parsed;
|
|
36269
|
+
const text2 = typeof obj.text === "string" ? obj.text : String(obj.text);
|
|
36270
|
+
const trace2 = this.parseTrace(obj.trace);
|
|
36271
|
+
return { text: text2, trace: trace2 };
|
|
36272
|
+
}
|
|
36273
|
+
} catch {
|
|
36274
|
+
}
|
|
36275
|
+
return { text: content };
|
|
36276
|
+
}
|
|
36277
|
+
parseTrace(trace2) {
|
|
36278
|
+
if (!Array.isArray(trace2)) {
|
|
36279
|
+
return void 0;
|
|
36280
|
+
}
|
|
36281
|
+
const validEvents = trace2.filter(isTraceEvent);
|
|
36282
|
+
return validEvents.length > 0 ? validEvents : void 0;
|
|
36283
|
+
}
|
|
36086
36284
|
async readAndCleanupOutputFile(filePath) {
|
|
36087
36285
|
try {
|
|
36088
36286
|
const content = await readTextFile(filePath);
|
|
@@ -37044,6 +37242,7 @@ var MockProvider = class {
|
|
|
37044
37242
|
delayMs;
|
|
37045
37243
|
delayMinMs;
|
|
37046
37244
|
delayMaxMs;
|
|
37245
|
+
trace;
|
|
37047
37246
|
constructor(targetName, config2) {
|
|
37048
37247
|
this.id = `mock:${targetName}`;
|
|
37049
37248
|
this.targetName = targetName;
|
|
@@ -37051,6 +37250,7 @@ var MockProvider = class {
|
|
|
37051
37250
|
this.delayMs = config2.delayMs ?? 0;
|
|
37052
37251
|
this.delayMinMs = config2.delayMinMs ?? 0;
|
|
37053
37252
|
this.delayMaxMs = config2.delayMaxMs ?? 0;
|
|
37253
|
+
this.trace = config2.trace;
|
|
37054
37254
|
}
|
|
37055
37255
|
async invoke(request) {
|
|
37056
37256
|
const delay2 = this.calculateDelay();
|
|
@@ -37062,7 +37262,8 @@ var MockProvider = class {
|
|
|
37062
37262
|
raw: {
|
|
37063
37263
|
question: request.question,
|
|
37064
37264
|
guidelines: request.guidelines
|
|
37065
|
-
}
|
|
37265
|
+
},
|
|
37266
|
+
trace: this.trace
|
|
37066
37267
|
};
|
|
37067
37268
|
}
|
|
37068
37269
|
calculateDelay() {
|
|
@@ -37827,6 +38028,251 @@ function substituteVariables(template, variables) {
|
|
|
37827
38028
|
return variables[varName] ?? match;
|
|
37828
38029
|
});
|
|
37829
38030
|
}
|
|
38031
|
+
var ToolTrajectoryEvaluator = class {
|
|
38032
|
+
kind = "tool_trajectory";
|
|
38033
|
+
config;
|
|
38034
|
+
constructor(options) {
|
|
38035
|
+
this.config = options.config;
|
|
38036
|
+
}
|
|
38037
|
+
evaluate(context) {
|
|
38038
|
+
const { candidateTrace, candidateTraceSummary } = context;
|
|
38039
|
+
if (!candidateTrace || !candidateTraceSummary) {
|
|
38040
|
+
return {
|
|
38041
|
+
score: 0,
|
|
38042
|
+
verdict: "fail",
|
|
38043
|
+
hits: [],
|
|
38044
|
+
misses: ["No trace available for evaluation"],
|
|
38045
|
+
expectedAspectCount: 1
|
|
38046
|
+
};
|
|
38047
|
+
}
|
|
38048
|
+
switch (this.config.mode) {
|
|
38049
|
+
case "any_order":
|
|
38050
|
+
return this.evaluateAnyOrder(candidateTraceSummary);
|
|
38051
|
+
case "in_order":
|
|
38052
|
+
return this.evaluateInOrder(candidateTrace);
|
|
38053
|
+
case "exact":
|
|
38054
|
+
return this.evaluateExact(candidateTrace);
|
|
38055
|
+
default:
|
|
38056
|
+
return {
|
|
38057
|
+
score: 0,
|
|
38058
|
+
verdict: "fail",
|
|
38059
|
+
hits: [],
|
|
38060
|
+
misses: [`Unknown mode: ${this.config.mode}`],
|
|
38061
|
+
expectedAspectCount: 1
|
|
38062
|
+
};
|
|
38063
|
+
}
|
|
38064
|
+
}
|
|
38065
|
+
evaluateAnyOrder(summary) {
|
|
38066
|
+
const minimums = this.config.minimums ?? {};
|
|
38067
|
+
const toolNames = Object.keys(minimums);
|
|
38068
|
+
if (toolNames.length === 0) {
|
|
38069
|
+
return {
|
|
38070
|
+
score: 1,
|
|
38071
|
+
verdict: "pass",
|
|
38072
|
+
hits: ["No tool requirements specified"],
|
|
38073
|
+
misses: [],
|
|
38074
|
+
expectedAspectCount: 0
|
|
38075
|
+
};
|
|
38076
|
+
}
|
|
38077
|
+
const hits = [];
|
|
38078
|
+
const misses = [];
|
|
38079
|
+
for (const toolName of toolNames) {
|
|
38080
|
+
const required2 = minimums[toolName];
|
|
38081
|
+
const actual = summary.toolCallsByName[toolName] ?? 0;
|
|
38082
|
+
if (actual >= required2) {
|
|
38083
|
+
hits.push(`${toolName}: called ${actual} times (required \u2265${required2})`);
|
|
38084
|
+
} else {
|
|
38085
|
+
misses.push(`${toolName}: called ${actual} times (required \u2265${required2})`);
|
|
38086
|
+
}
|
|
38087
|
+
}
|
|
38088
|
+
const score = hits.length / toolNames.length;
|
|
38089
|
+
return {
|
|
38090
|
+
score,
|
|
38091
|
+
verdict: scoreToVerdict(score),
|
|
38092
|
+
hits,
|
|
38093
|
+
misses,
|
|
38094
|
+
expectedAspectCount: toolNames.length
|
|
38095
|
+
};
|
|
38096
|
+
}
|
|
38097
|
+
evaluateInOrder(trace2) {
|
|
38098
|
+
const expected = this.config.expected ?? [];
|
|
38099
|
+
if (expected.length === 0) {
|
|
38100
|
+
return {
|
|
38101
|
+
score: 1,
|
|
38102
|
+
verdict: "pass",
|
|
38103
|
+
hits: ["No tool sequence specified"],
|
|
38104
|
+
misses: [],
|
|
38105
|
+
expectedAspectCount: 0
|
|
38106
|
+
};
|
|
38107
|
+
}
|
|
38108
|
+
const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
|
|
38109
|
+
const hits = [];
|
|
38110
|
+
const misses = [];
|
|
38111
|
+
let actualIndex = 0;
|
|
38112
|
+
for (let i = 0; i < expected.length; i++) {
|
|
38113
|
+
const expectedTool = expected[i].tool;
|
|
38114
|
+
let found = false;
|
|
38115
|
+
while (actualIndex < actualToolCalls.length) {
|
|
38116
|
+
if (actualToolCalls[actualIndex].name === expectedTool) {
|
|
38117
|
+
hits.push(`Found ${expectedTool} at position ${actualIndex}`);
|
|
38118
|
+
actualIndex++;
|
|
38119
|
+
found = true;
|
|
38120
|
+
break;
|
|
38121
|
+
}
|
|
38122
|
+
actualIndex++;
|
|
38123
|
+
}
|
|
38124
|
+
if (!found) {
|
|
38125
|
+
misses.push(`Expected ${expectedTool} at position ${i}, not found in remaining trace`);
|
|
38126
|
+
}
|
|
38127
|
+
}
|
|
38128
|
+
const score = hits.length / expected.length;
|
|
38129
|
+
return {
|
|
38130
|
+
score,
|
|
38131
|
+
verdict: scoreToVerdict(score),
|
|
38132
|
+
hits,
|
|
38133
|
+
misses,
|
|
38134
|
+
expectedAspectCount: expected.length
|
|
38135
|
+
};
|
|
38136
|
+
}
|
|
38137
|
+
evaluateExact(trace2) {
|
|
38138
|
+
const expected = this.config.expected ?? [];
|
|
38139
|
+
if (expected.length === 0) {
|
|
38140
|
+
return {
|
|
38141
|
+
score: 1,
|
|
38142
|
+
verdict: "pass",
|
|
38143
|
+
hits: ["No tool sequence specified"],
|
|
38144
|
+
misses: [],
|
|
38145
|
+
expectedAspectCount: 0
|
|
38146
|
+
};
|
|
38147
|
+
}
|
|
38148
|
+
const actualToolCalls = trace2.filter((e) => e.type === "tool_call" && e.name);
|
|
38149
|
+
const hits = [];
|
|
38150
|
+
const misses = [];
|
|
38151
|
+
if (actualToolCalls.length !== expected.length) {
|
|
38152
|
+
misses.push(`Expected ${expected.length} tool calls, got ${actualToolCalls.length}`);
|
|
38153
|
+
}
|
|
38154
|
+
const checkLength = Math.min(expected.length, actualToolCalls.length);
|
|
38155
|
+
for (let i = 0; i < checkLength; i++) {
|
|
38156
|
+
const expectedTool = expected[i].tool;
|
|
38157
|
+
const actualTool = actualToolCalls[i].name;
|
|
38158
|
+
if (actualTool === expectedTool) {
|
|
38159
|
+
hits.push(`Position ${i}: ${expectedTool} \u2713`);
|
|
38160
|
+
} else {
|
|
38161
|
+
misses.push(`Position ${i}: expected ${expectedTool}, got ${actualTool}`);
|
|
38162
|
+
}
|
|
38163
|
+
}
|
|
38164
|
+
for (let i = checkLength; i < expected.length; i++) {
|
|
38165
|
+
misses.push(`Position ${i}: expected ${expected[i].tool}, got nothing`);
|
|
38166
|
+
}
|
|
38167
|
+
const score = hits.length / expected.length;
|
|
38168
|
+
return {
|
|
38169
|
+
score,
|
|
38170
|
+
verdict: scoreToVerdict(score),
|
|
38171
|
+
hits,
|
|
38172
|
+
misses,
|
|
38173
|
+
expectedAspectCount: expected.length
|
|
38174
|
+
};
|
|
38175
|
+
}
|
|
38176
|
+
};
|
|
38177
|
+
var ExpectedMessagesEvaluator = class {
|
|
38178
|
+
kind = "expected_messages";
|
|
38179
|
+
evaluate(context) {
|
|
38180
|
+
const { candidateTrace, evalCase } = context;
|
|
38181
|
+
const expectedSegments = evalCase.expected_segments;
|
|
38182
|
+
const expectedToolCalls = this.extractExpectedToolCalls(expectedSegments);
|
|
38183
|
+
if (expectedToolCalls.length === 0) {
|
|
38184
|
+
return {
|
|
38185
|
+
score: 1,
|
|
38186
|
+
verdict: "pass",
|
|
38187
|
+
hits: ["No tool_calls specified in expected_messages"],
|
|
38188
|
+
misses: [],
|
|
38189
|
+
expectedAspectCount: 1
|
|
38190
|
+
};
|
|
38191
|
+
}
|
|
38192
|
+
if (!candidateTrace || candidateTrace.length === 0) {
|
|
38193
|
+
return {
|
|
38194
|
+
score: 0,
|
|
38195
|
+
verdict: "fail",
|
|
38196
|
+
hits: [],
|
|
38197
|
+
misses: ["No trace available to validate tool_calls"],
|
|
38198
|
+
expectedAspectCount: expectedToolCalls.length
|
|
38199
|
+
};
|
|
38200
|
+
}
|
|
38201
|
+
const actualToolCalls = candidateTrace.filter((e) => e.type === "tool_call");
|
|
38202
|
+
return this.validateToolCalls(expectedToolCalls, actualToolCalls);
|
|
38203
|
+
}
|
|
38204
|
+
extractExpectedToolCalls(segments) {
|
|
38205
|
+
if (!segments) {
|
|
38206
|
+
return [];
|
|
38207
|
+
}
|
|
38208
|
+
const toolCalls = [];
|
|
38209
|
+
for (const segment of segments) {
|
|
38210
|
+
const role = segment.role;
|
|
38211
|
+
const segmentToolCalls = segment.tool_calls;
|
|
38212
|
+
if (role === "assistant" && Array.isArray(segmentToolCalls)) {
|
|
38213
|
+
for (const tc of segmentToolCalls) {
|
|
38214
|
+
if (typeof tc === "object" && tc !== null && typeof tc.tool === "string") {
|
|
38215
|
+
const toolCall = tc;
|
|
38216
|
+
toolCalls.push({ tool: toolCall.tool, input: toolCall.input });
|
|
38217
|
+
}
|
|
38218
|
+
}
|
|
38219
|
+
}
|
|
38220
|
+
}
|
|
38221
|
+
return toolCalls;
|
|
38222
|
+
}
|
|
38223
|
+
validateToolCalls(expected, actual) {
|
|
38224
|
+
const hits = [];
|
|
38225
|
+
const misses = [];
|
|
38226
|
+
for (let i = 0; i < expected.length; i++) {
|
|
38227
|
+
const expectedCall = expected[i];
|
|
38228
|
+
const actualCall = actual[i];
|
|
38229
|
+
if (!actualCall) {
|
|
38230
|
+
misses.push(
|
|
38231
|
+
`tool_calls[${i}]: expected ${expectedCall.tool}, but no more tool calls in trace`
|
|
38232
|
+
);
|
|
38233
|
+
continue;
|
|
38234
|
+
}
|
|
38235
|
+
if (actualCall.name !== expectedCall.tool) {
|
|
38236
|
+
misses.push(
|
|
38237
|
+
`tool_calls[${i}]: expected ${expectedCall.tool}, got ${actualCall.name ?? "unknown"}`
|
|
38238
|
+
);
|
|
38239
|
+
continue;
|
|
38240
|
+
}
|
|
38241
|
+
if (expectedCall.input !== void 0) {
|
|
38242
|
+
if (!this.deepEquals(expectedCall.input, actualCall.input)) {
|
|
38243
|
+
misses.push(`tool_calls[${i}]: ${expectedCall.tool} input mismatch`);
|
|
38244
|
+
continue;
|
|
38245
|
+
}
|
|
38246
|
+
}
|
|
38247
|
+
hits.push(`tool_calls[${i}]: ${expectedCall.tool} matched`);
|
|
38248
|
+
}
|
|
38249
|
+
const totalChecks = expected.length || 1;
|
|
38250
|
+
const score = hits.length / totalChecks;
|
|
38251
|
+
return {
|
|
38252
|
+
score,
|
|
38253
|
+
verdict: score >= 0.8 ? "pass" : score >= 0.6 ? "borderline" : "fail",
|
|
38254
|
+
hits,
|
|
38255
|
+
misses,
|
|
38256
|
+
expectedAspectCount: totalChecks
|
|
38257
|
+
};
|
|
38258
|
+
}
|
|
38259
|
+
deepEquals(a, b) {
|
|
38260
|
+
if (a === b) return true;
|
|
38261
|
+
if (typeof a !== typeof b) return false;
|
|
38262
|
+
if (typeof a !== "object" || a === null || b === null) return false;
|
|
38263
|
+
if (Array.isArray(a) && Array.isArray(b)) {
|
|
38264
|
+
if (a.length !== b.length) return false;
|
|
38265
|
+
return a.every((val, i) => this.deepEquals(val, b[i]));
|
|
38266
|
+
}
|
|
38267
|
+
if (Array.isArray(a) || Array.isArray(b)) return false;
|
|
38268
|
+
const aObj = a;
|
|
38269
|
+
const bObj = b;
|
|
38270
|
+
const aKeys = Object.keys(aObj);
|
|
38271
|
+
const bKeys = Object.keys(bObj);
|
|
38272
|
+
if (aKeys.length !== bKeys.length) return false;
|
|
38273
|
+
return aKeys.every((key2) => this.deepEquals(aObj[key2], bObj[key2]));
|
|
38274
|
+
}
|
|
38275
|
+
};
|
|
37830
38276
|
var DEFAULT_COMPOSITE_AGGREGATOR_PROMPT = `Review the following evaluation results:
|
|
37831
38277
|
{{EVALUATOR_RESULTS_JSON}}
|
|
37832
38278
|
|
|
@@ -38239,7 +38685,7 @@ async function runEvaluation(options) {
|
|
|
38239
38685
|
if (!definition) {
|
|
38240
38686
|
return void 0;
|
|
38241
38687
|
}
|
|
38242
|
-
const resolved = resolveTargetDefinition(definition, envLookup);
|
|
38688
|
+
const resolved = resolveTargetDefinition(definition, envLookup, evalFilePath);
|
|
38243
38689
|
resolvedTargetsByName.set(name16, resolved);
|
|
38244
38690
|
return resolved;
|
|
38245
38691
|
};
|
|
@@ -38553,6 +38999,17 @@ async function runEvalCase(options) {
|
|
|
38553
38999
|
if (cacheKey && cache && !cachedResponse) {
|
|
38554
39000
|
await cache.set(cacheKey, providerResponse);
|
|
38555
39001
|
}
|
|
39002
|
+
let candidateTrace = providerResponse.trace;
|
|
39003
|
+
if (!candidateTrace && providerResponse.traceRef) {
|
|
39004
|
+
try {
|
|
39005
|
+
const rawTrace = await readJsonFile(providerResponse.traceRef);
|
|
39006
|
+
if (Array.isArray(rawTrace) && rawTrace.every(isTraceEvent)) {
|
|
39007
|
+
candidateTrace = rawTrace;
|
|
39008
|
+
}
|
|
39009
|
+
} catch {
|
|
39010
|
+
}
|
|
39011
|
+
}
|
|
39012
|
+
const candidateTraceSummary = candidateTrace ? computeTraceSummary(candidateTrace) : void 0;
|
|
38556
39013
|
try {
|
|
38557
39014
|
return await evaluateCandidate({
|
|
38558
39015
|
evalCase,
|
|
@@ -38564,7 +39021,9 @@ async function runEvalCase(options) {
|
|
|
38564
39021
|
nowFn,
|
|
38565
39022
|
attempt,
|
|
38566
39023
|
judgeProvider,
|
|
38567
|
-
agentTimeoutMs
|
|
39024
|
+
agentTimeoutMs,
|
|
39025
|
+
candidateTrace,
|
|
39026
|
+
candidateTraceSummary
|
|
38568
39027
|
});
|
|
38569
39028
|
} catch (error40) {
|
|
38570
39029
|
return buildErrorResult(evalCase, target.name, nowFn(), error40, promptInputs, provider);
|
|
@@ -38581,7 +39040,9 @@ async function evaluateCandidate(options) {
|
|
|
38581
39040
|
nowFn,
|
|
38582
39041
|
attempt,
|
|
38583
39042
|
judgeProvider,
|
|
38584
|
-
agentTimeoutMs
|
|
39043
|
+
agentTimeoutMs,
|
|
39044
|
+
candidateTrace,
|
|
39045
|
+
candidateTraceSummary
|
|
38585
39046
|
} = options;
|
|
38586
39047
|
const gradeTimestamp = nowFn();
|
|
38587
39048
|
const { score, evaluatorResults } = await runEvaluatorsForCase({
|
|
@@ -38594,7 +39055,9 @@ async function evaluateCandidate(options) {
|
|
|
38594
39055
|
promptInputs,
|
|
38595
39056
|
now: gradeTimestamp,
|
|
38596
39057
|
judgeProvider,
|
|
38597
|
-
agentTimeoutMs
|
|
39058
|
+
agentTimeoutMs,
|
|
39059
|
+
candidateTrace,
|
|
39060
|
+
candidateTraceSummary
|
|
38598
39061
|
});
|
|
38599
39062
|
const completedAt = nowFn();
|
|
38600
39063
|
let agentProviderRequest;
|
|
@@ -38633,7 +39096,8 @@ async function evaluateCandidate(options) {
|
|
|
38633
39096
|
agent_provider_request: agentProviderRequest,
|
|
38634
39097
|
lm_provider_request: lmProviderRequest,
|
|
38635
39098
|
evaluator_provider_request: evaluatorResults ? void 0 : score.evaluatorRawRequest,
|
|
38636
|
-
evaluator_results: evaluatorResults
|
|
39099
|
+
evaluator_results: evaluatorResults,
|
|
39100
|
+
trace_summary: candidateTraceSummary
|
|
38637
39101
|
};
|
|
38638
39102
|
}
|
|
38639
39103
|
async function runEvaluatorsForCase(options) {
|
|
@@ -38647,7 +39111,9 @@ async function runEvaluatorsForCase(options) {
|
|
|
38647
39111
|
promptInputs,
|
|
38648
39112
|
now,
|
|
38649
39113
|
judgeProvider,
|
|
38650
|
-
agentTimeoutMs
|
|
39114
|
+
agentTimeoutMs,
|
|
39115
|
+
candidateTrace,
|
|
39116
|
+
candidateTraceSummary
|
|
38651
39117
|
} = options;
|
|
38652
39118
|
if (evalCase.evaluators && evalCase.evaluators.length > 0) {
|
|
38653
39119
|
return runEvaluatorList({
|
|
@@ -38661,7 +39127,9 @@ async function runEvaluatorsForCase(options) {
|
|
|
38661
39127
|
promptInputs,
|
|
38662
39128
|
now,
|
|
38663
39129
|
judgeProvider,
|
|
38664
|
-
agentTimeoutMs
|
|
39130
|
+
agentTimeoutMs,
|
|
39131
|
+
candidateTrace,
|
|
39132
|
+
candidateTraceSummary
|
|
38665
39133
|
});
|
|
38666
39134
|
}
|
|
38667
39135
|
const evaluatorKind = evalCase.evaluator ?? "llm_judge";
|
|
@@ -38677,7 +39145,9 @@ async function runEvaluatorsForCase(options) {
|
|
|
38677
39145
|
attempt,
|
|
38678
39146
|
promptInputs,
|
|
38679
39147
|
now,
|
|
38680
|
-
judgeProvider
|
|
39148
|
+
judgeProvider,
|
|
39149
|
+
candidateTrace,
|
|
39150
|
+
candidateTraceSummary
|
|
38681
39151
|
});
|
|
38682
39152
|
return { score };
|
|
38683
39153
|
}
|
|
@@ -38693,7 +39163,9 @@ async function runEvaluatorList(options) {
|
|
|
38693
39163
|
promptInputs,
|
|
38694
39164
|
now,
|
|
38695
39165
|
judgeProvider,
|
|
38696
|
-
agentTimeoutMs
|
|
39166
|
+
agentTimeoutMs,
|
|
39167
|
+
candidateTrace,
|
|
39168
|
+
candidateTraceSummary
|
|
38697
39169
|
} = options;
|
|
38698
39170
|
const scored = [];
|
|
38699
39171
|
const evaluatorResults = [];
|
|
@@ -38769,6 +39241,12 @@ async function runEvaluatorList(options) {
|
|
|
38769
39241
|
cwd: evalFileDir,
|
|
38770
39242
|
evaluatorFactory: { create: createEvaluator }
|
|
38771
39243
|
});
|
|
39244
|
+
case "tool_trajectory":
|
|
39245
|
+
return new ToolTrajectoryEvaluator({
|
|
39246
|
+
config: memberConfig
|
|
39247
|
+
});
|
|
39248
|
+
case "expected_messages":
|
|
39249
|
+
return new ExpectedMessagesEvaluator();
|
|
38772
39250
|
default: {
|
|
38773
39251
|
const unknownConfig = memberConfig;
|
|
38774
39252
|
throw new Error(`Unsupported evaluator type in composite: ${unknownConfig.type}`);
|
|
@@ -38803,6 +39281,56 @@ async function runEvaluatorList(options) {
|
|
|
38803
39281
|
evaluator_results: mapChildResults(score2.evaluatorResults)
|
|
38804
39282
|
});
|
|
38805
39283
|
}
|
|
39284
|
+
if (evaluator.type === "tool_trajectory") {
|
|
39285
|
+
const trajectoryEvaluator = new ToolTrajectoryEvaluator({
|
|
39286
|
+
config: evaluator
|
|
39287
|
+
});
|
|
39288
|
+
const score2 = trajectoryEvaluator.evaluate({
|
|
39289
|
+
evalCase,
|
|
39290
|
+
candidate,
|
|
39291
|
+
target,
|
|
39292
|
+
provider,
|
|
39293
|
+
attempt,
|
|
39294
|
+
promptInputs,
|
|
39295
|
+
now,
|
|
39296
|
+
candidateTrace,
|
|
39297
|
+
candidateTraceSummary
|
|
39298
|
+
});
|
|
39299
|
+
scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
|
|
39300
|
+
evaluatorResults.push({
|
|
39301
|
+
name: evaluator.name,
|
|
39302
|
+
type: evaluator.type,
|
|
39303
|
+
score: score2.score,
|
|
39304
|
+
verdict: score2.verdict,
|
|
39305
|
+
hits: score2.hits,
|
|
39306
|
+
misses: score2.misses,
|
|
39307
|
+
reasoning: score2.reasoning
|
|
39308
|
+
});
|
|
39309
|
+
}
|
|
39310
|
+
if (evaluator.type === "expected_messages") {
|
|
39311
|
+
const expectedMessagesEvaluator = new ExpectedMessagesEvaluator();
|
|
39312
|
+
const score2 = expectedMessagesEvaluator.evaluate({
|
|
39313
|
+
evalCase,
|
|
39314
|
+
candidate,
|
|
39315
|
+
target,
|
|
39316
|
+
provider,
|
|
39317
|
+
attempt,
|
|
39318
|
+
promptInputs,
|
|
39319
|
+
now,
|
|
39320
|
+
candidateTrace,
|
|
39321
|
+
candidateTraceSummary
|
|
39322
|
+
});
|
|
39323
|
+
scored.push({ score: score2, name: evaluator.name, type: evaluator.type });
|
|
39324
|
+
evaluatorResults.push({
|
|
39325
|
+
name: evaluator.name,
|
|
39326
|
+
type: evaluator.type,
|
|
39327
|
+
score: score2.score,
|
|
39328
|
+
verdict: score2.verdict,
|
|
39329
|
+
hits: score2.hits,
|
|
39330
|
+
misses: score2.misses,
|
|
39331
|
+
reasoning: score2.reasoning
|
|
39332
|
+
});
|
|
39333
|
+
}
|
|
38806
39334
|
} catch (error40) {
|
|
38807
39335
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
38808
39336
|
const fallbackScore = {
|
|
@@ -39136,13 +39664,13 @@ function buildPrompt(expectedOutcome, question, referenceAnswer) {
|
|
|
39136
39664
|
// src/commands/eval/env.ts
|
|
39137
39665
|
import { constants as constants4 } from "node:fs";
|
|
39138
39666
|
import { access as access4 } from "node:fs/promises";
|
|
39139
|
-
import
|
|
39667
|
+
import path14 from "node:path";
|
|
39140
39668
|
import { config as loadDotenv } from "dotenv";
|
|
39141
39669
|
function uniqueDirs(directories) {
|
|
39142
39670
|
const seen = /* @__PURE__ */ new Set();
|
|
39143
39671
|
const result = [];
|
|
39144
39672
|
for (const dir of directories) {
|
|
39145
|
-
const absolute =
|
|
39673
|
+
const absolute = path14.resolve(dir);
|
|
39146
39674
|
if (seen.has(absolute)) {
|
|
39147
39675
|
continue;
|
|
39148
39676
|
}
|
|
@@ -39161,14 +39689,14 @@ async function fileExists4(filePath) {
|
|
|
39161
39689
|
}
|
|
39162
39690
|
function collectAncestorDirectories(start, boundary) {
|
|
39163
39691
|
const directories = [];
|
|
39164
|
-
const boundaryDir =
|
|
39165
|
-
let current =
|
|
39692
|
+
const boundaryDir = path14.resolve(boundary);
|
|
39693
|
+
let current = path14.resolve(start);
|
|
39166
39694
|
while (current !== void 0) {
|
|
39167
39695
|
directories.push(current);
|
|
39168
39696
|
if (current === boundaryDir) {
|
|
39169
39697
|
break;
|
|
39170
39698
|
}
|
|
39171
|
-
const parent =
|
|
39699
|
+
const parent = path14.dirname(current);
|
|
39172
39700
|
if (parent === current) {
|
|
39173
39701
|
break;
|
|
39174
39702
|
}
|
|
@@ -39178,29 +39706,36 @@ function collectAncestorDirectories(start, boundary) {
|
|
|
39178
39706
|
}
|
|
39179
39707
|
async function loadEnvFromHierarchy(options) {
|
|
39180
39708
|
const { testFilePath, repoRoot, verbose } = options;
|
|
39181
|
-
const testDir =
|
|
39709
|
+
const testDir = path14.dirname(path14.resolve(testFilePath));
|
|
39182
39710
|
const cwd = process.cwd();
|
|
39183
39711
|
const searchDirs = uniqueDirs([...collectAncestorDirectories(testDir, repoRoot), repoRoot, cwd]);
|
|
39712
|
+
const envFiles = [];
|
|
39184
39713
|
for (const dir of searchDirs) {
|
|
39185
|
-
const candidate =
|
|
39714
|
+
const candidate = path14.join(dir, ".env");
|
|
39186
39715
|
if (await fileExists4(candidate)) {
|
|
39187
|
-
|
|
39188
|
-
if (verbose) {
|
|
39189
|
-
console.log(`Loaded environment from: ${candidate}`);
|
|
39190
|
-
}
|
|
39191
|
-
return candidate;
|
|
39716
|
+
envFiles.push(candidate);
|
|
39192
39717
|
}
|
|
39193
39718
|
}
|
|
39194
|
-
if (
|
|
39195
|
-
|
|
39719
|
+
if (envFiles.length === 0) {
|
|
39720
|
+
if (verbose) {
|
|
39721
|
+
console.log("No .env file found in hierarchy");
|
|
39722
|
+
}
|
|
39723
|
+
return void 0;
|
|
39196
39724
|
}
|
|
39197
|
-
|
|
39725
|
+
for (let i = envFiles.length - 1; i >= 0; i--) {
|
|
39726
|
+
const envFile = envFiles[i];
|
|
39727
|
+
loadDotenv({ path: envFile, override: false });
|
|
39728
|
+
if (verbose) {
|
|
39729
|
+
console.log(`Loaded environment from: ${envFile}`);
|
|
39730
|
+
}
|
|
39731
|
+
}
|
|
39732
|
+
return envFiles[0];
|
|
39198
39733
|
}
|
|
39199
39734
|
|
|
39200
39735
|
// src/commands/eval/jsonl-writer.ts
|
|
39201
39736
|
import { createWriteStream as createWriteStream2 } from "node:fs";
|
|
39202
39737
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
39203
|
-
import
|
|
39738
|
+
import path15 from "node:path";
|
|
39204
39739
|
import { finished } from "node:stream/promises";
|
|
39205
39740
|
|
|
39206
39741
|
// ../../node_modules/.bun/async-mutex@0.5.0/node_modules/async-mutex/index.mjs
|
|
@@ -39418,7 +39953,7 @@ var JsonlWriter = class _JsonlWriter {
|
|
|
39418
39953
|
this.stream = stream;
|
|
39419
39954
|
}
|
|
39420
39955
|
static async open(filePath) {
|
|
39421
|
-
await mkdir5(
|
|
39956
|
+
await mkdir5(path15.dirname(filePath), { recursive: true });
|
|
39422
39957
|
const stream = createWriteStream2(filePath, { flags: "w", encoding: "utf8" });
|
|
39423
39958
|
return new _JsonlWriter(stream);
|
|
39424
39959
|
}
|
|
@@ -39450,7 +39985,7 @@ var JsonlWriter = class _JsonlWriter {
|
|
|
39450
39985
|
// src/commands/eval/yaml-writer.ts
|
|
39451
39986
|
import { createWriteStream as createWriteStream3 } from "node:fs";
|
|
39452
39987
|
import { mkdir as mkdir6 } from "node:fs/promises";
|
|
39453
|
-
import
|
|
39988
|
+
import path16 from "node:path";
|
|
39454
39989
|
import { finished as finished2 } from "node:stream/promises";
|
|
39455
39990
|
import { stringify as stringifyYaml } from "yaml";
|
|
39456
39991
|
var YamlWriter = class _YamlWriter {
|
|
@@ -39462,7 +39997,7 @@ var YamlWriter = class _YamlWriter {
|
|
|
39462
39997
|
this.stream = stream;
|
|
39463
39998
|
}
|
|
39464
39999
|
static async open(filePath) {
|
|
39465
|
-
await mkdir6(
|
|
40000
|
+
await mkdir6(path16.dirname(filePath), { recursive: true });
|
|
39466
40001
|
const stream = createWriteStream3(filePath, { flags: "w", encoding: "utf8" });
|
|
39467
40002
|
return new _YamlWriter(stream);
|
|
39468
40003
|
}
|
|
@@ -39586,12 +40121,12 @@ var ProgressDisplay = class {
|
|
|
39586
40121
|
}
|
|
39587
40122
|
addLogPaths(paths) {
|
|
39588
40123
|
const newPaths = [];
|
|
39589
|
-
for (const
|
|
39590
|
-
if (this.logPathSet.has(
|
|
40124
|
+
for (const path27 of paths) {
|
|
40125
|
+
if (this.logPathSet.has(path27)) {
|
|
39591
40126
|
continue;
|
|
39592
40127
|
}
|
|
39593
|
-
this.logPathSet.add(
|
|
39594
|
-
newPaths.push(
|
|
40128
|
+
this.logPathSet.add(path27);
|
|
40129
|
+
newPaths.push(path27);
|
|
39595
40130
|
}
|
|
39596
40131
|
if (newPaths.length === 0) {
|
|
39597
40132
|
return;
|
|
@@ -39607,8 +40142,8 @@ var ProgressDisplay = class {
|
|
|
39607
40142
|
this.hasPrintedLogHeader = true;
|
|
39608
40143
|
}
|
|
39609
40144
|
const startIndex = this.logPaths.length - newPaths.length;
|
|
39610
|
-
newPaths.forEach((
|
|
39611
|
-
console.log(`${startIndex + offset + 1}. ${
|
|
40145
|
+
newPaths.forEach((path27, offset) => {
|
|
40146
|
+
console.log(`${startIndex + offset + 1}. ${path27}`);
|
|
39612
40147
|
});
|
|
39613
40148
|
}
|
|
39614
40149
|
scheduleRender() {
|
|
@@ -39656,8 +40191,8 @@ var ProgressDisplay = class {
|
|
|
39656
40191
|
if (this.logPaths.length > 0) {
|
|
39657
40192
|
lines.push("");
|
|
39658
40193
|
lines.push("Codex CLI logs:");
|
|
39659
|
-
this.logPaths.forEach((
|
|
39660
|
-
lines.push(`${index + 1}. ${
|
|
40194
|
+
this.logPaths.forEach((path27, index) => {
|
|
40195
|
+
lines.push(`${index + 1}. ${path27}`);
|
|
39661
40196
|
});
|
|
39662
40197
|
}
|
|
39663
40198
|
const rowCount = this.getRenderedRowCount(lines);
|
|
@@ -39864,7 +40399,7 @@ function formatEvaluationSummary(summary) {
|
|
|
39864
40399
|
|
|
39865
40400
|
// ../../packages/core/dist/evaluation/validation/index.js
|
|
39866
40401
|
import { readFile as readFile7 } from "node:fs/promises";
|
|
39867
|
-
import
|
|
40402
|
+
import path17 from "node:path";
|
|
39868
40403
|
import { parse as parse6 } from "yaml";
|
|
39869
40404
|
import { readFile as readFile23 } from "node:fs/promises";
|
|
39870
40405
|
import path23 from "node:path";
|
|
@@ -39907,8 +40442,8 @@ async function detectFileType(filePath) {
|
|
|
39907
40442
|
}
|
|
39908
40443
|
}
|
|
39909
40444
|
function inferFileTypeFromPath(filePath) {
|
|
39910
|
-
const normalized =
|
|
39911
|
-
const basename =
|
|
40445
|
+
const normalized = path17.normalize(filePath).replace(/\\/g, "/");
|
|
40446
|
+
const basename = path17.basename(filePath);
|
|
39912
40447
|
if (normalized.includes("/.agentv/")) {
|
|
39913
40448
|
if (basename === "config.yaml" || basename === "config.yml") {
|
|
39914
40449
|
return "config";
|
|
@@ -40053,6 +40588,26 @@ function validateMessages(messages, location, filePath, errors) {
|
|
|
40053
40588
|
message: `Invalid role '${role}'. Must be one of: ${validRoles.join(", ")}`
|
|
40054
40589
|
});
|
|
40055
40590
|
}
|
|
40591
|
+
const toolCalls = message.tool_calls;
|
|
40592
|
+
if (toolCalls !== void 0) {
|
|
40593
|
+
if (role !== "assistant") {
|
|
40594
|
+
errors.push({
|
|
40595
|
+
severity: "error",
|
|
40596
|
+
filePath,
|
|
40597
|
+
location: `${msgLocation}.tool_calls`,
|
|
40598
|
+
message: "tool_calls can only be specified on assistant messages"
|
|
40599
|
+
});
|
|
40600
|
+
} else if (!Array.isArray(toolCalls)) {
|
|
40601
|
+
errors.push({
|
|
40602
|
+
severity: "error",
|
|
40603
|
+
filePath,
|
|
40604
|
+
location: `${msgLocation}.tool_calls`,
|
|
40605
|
+
message: "tool_calls must be an array"
|
|
40606
|
+
});
|
|
40607
|
+
} else {
|
|
40608
|
+
validateToolCalls(toolCalls, `${msgLocation}.tool_calls`, filePath, errors);
|
|
40609
|
+
}
|
|
40610
|
+
}
|
|
40056
40611
|
const content = message.content;
|
|
40057
40612
|
if (typeof content === "string") {
|
|
40058
40613
|
validateContentForRoleMarkers(content, `${msgLocation}.content`, filePath, errors);
|
|
@@ -40117,6 +40672,30 @@ function validateContentForRoleMarkers(content, location, filePath, errors) {
|
|
|
40117
40672
|
}
|
|
40118
40673
|
}
|
|
40119
40674
|
}
|
|
40675
|
+
function validateToolCalls(toolCalls, location, filePath, errors) {
|
|
40676
|
+
for (let i = 0; i < toolCalls.length; i++) {
|
|
40677
|
+
const toolCall = toolCalls[i];
|
|
40678
|
+
const callLocation = `${location}[${i}]`;
|
|
40679
|
+
if (!isObject2(toolCall)) {
|
|
40680
|
+
errors.push({
|
|
40681
|
+
severity: "error",
|
|
40682
|
+
filePath,
|
|
40683
|
+
location: callLocation,
|
|
40684
|
+
message: "Tool call must be an object"
|
|
40685
|
+
});
|
|
40686
|
+
continue;
|
|
40687
|
+
}
|
|
40688
|
+
const tool2 = toolCall.tool;
|
|
40689
|
+
if (typeof tool2 !== "string" || tool2.trim().length === 0) {
|
|
40690
|
+
errors.push({
|
|
40691
|
+
severity: "error",
|
|
40692
|
+
filePath,
|
|
40693
|
+
location: `${callLocation}.tool`,
|
|
40694
|
+
message: "Missing or invalid 'tool' field (must be a non-empty string)"
|
|
40695
|
+
});
|
|
40696
|
+
}
|
|
40697
|
+
}
|
|
40698
|
+
}
|
|
40120
40699
|
function isObject22(value) {
|
|
40121
40700
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
40122
40701
|
}
|
|
@@ -40212,7 +40791,9 @@ var MOCK_SETTINGS = /* @__PURE__ */ new Set([
|
|
|
40212
40791
|
"response",
|
|
40213
40792
|
"delayMs",
|
|
40214
40793
|
"delayMinMs",
|
|
40215
|
-
"delayMaxMs"
|
|
40794
|
+
"delayMaxMs",
|
|
40795
|
+
"trace"
|
|
40796
|
+
// For testing tool_trajectory evaluator
|
|
40216
40797
|
]);
|
|
40217
40798
|
var CLI_SETTINGS = /* @__PURE__ */ new Set([
|
|
40218
40799
|
...COMMON_SETTINGS,
|
|
@@ -40735,12 +41316,12 @@ async function validateMessagesFileRefs(messages, location, searchRoots, filePat
|
|
|
40735
41316
|
// src/utils/targets.ts
|
|
40736
41317
|
import { constants as constants5 } from "node:fs";
|
|
40737
41318
|
import { access as access5 } from "node:fs/promises";
|
|
40738
|
-
import
|
|
41319
|
+
import path18 from "node:path";
|
|
40739
41320
|
var TARGET_FILE_CANDIDATES = [
|
|
40740
41321
|
"targets.yaml",
|
|
40741
41322
|
"targets.yml",
|
|
40742
|
-
|
|
40743
|
-
|
|
41323
|
+
path18.join(".agentv", "targets.yaml"),
|
|
41324
|
+
path18.join(".agentv", "targets.yml")
|
|
40744
41325
|
];
|
|
40745
41326
|
async function fileExists5(filePath) {
|
|
40746
41327
|
try {
|
|
@@ -40753,12 +41334,12 @@ async function fileExists5(filePath) {
|
|
|
40753
41334
|
async function discoverTargetsFile(options) {
|
|
40754
41335
|
const { explicitPath, testFilePath, repoRoot, cwd } = options;
|
|
40755
41336
|
if (explicitPath) {
|
|
40756
|
-
const resolvedExplicit =
|
|
41337
|
+
const resolvedExplicit = path18.resolve(explicitPath);
|
|
40757
41338
|
if (await fileExists5(resolvedExplicit)) {
|
|
40758
41339
|
return resolvedExplicit;
|
|
40759
41340
|
}
|
|
40760
41341
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
40761
|
-
const nested =
|
|
41342
|
+
const nested = path18.join(resolvedExplicit, candidate);
|
|
40762
41343
|
if (await fileExists5(nested)) {
|
|
40763
41344
|
return nested;
|
|
40764
41345
|
}
|
|
@@ -40766,13 +41347,13 @@ async function discoverTargetsFile(options) {
|
|
|
40766
41347
|
throw new Error(`targets.yaml not found at provided path: ${resolvedExplicit}`);
|
|
40767
41348
|
}
|
|
40768
41349
|
const directories = [...buildDirectoryChain(testFilePath, repoRoot)];
|
|
40769
|
-
const resolvedCwd =
|
|
41350
|
+
const resolvedCwd = path18.resolve(cwd);
|
|
40770
41351
|
if (!directories.includes(resolvedCwd)) {
|
|
40771
41352
|
directories.push(resolvedCwd);
|
|
40772
41353
|
}
|
|
40773
41354
|
for (const directory of directories) {
|
|
40774
41355
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
40775
|
-
const fullPath =
|
|
41356
|
+
const fullPath = path18.join(directory, candidate);
|
|
40776
41357
|
if (await fileExists5(fullPath)) {
|
|
40777
41358
|
return fullPath;
|
|
40778
41359
|
}
|
|
@@ -40881,7 +41462,7 @@ Errors in ${targetsFilePath}:`);
|
|
|
40881
41462
|
};
|
|
40882
41463
|
}
|
|
40883
41464
|
try {
|
|
40884
|
-
const resolvedTarget = resolveTargetDefinition(targetDefinition, env);
|
|
41465
|
+
const resolvedTarget = resolveTargetDefinition(targetDefinition, env, testFilePath);
|
|
40885
41466
|
return {
|
|
40886
41467
|
definitions,
|
|
40887
41468
|
resolvedTarget,
|
|
@@ -40938,7 +41519,9 @@ function normalizeOptions(rawOptions) {
|
|
|
40938
41519
|
maxRetries: normalizeNumber(rawOptions.maxRetries, 2),
|
|
40939
41520
|
cache: normalizeBoolean(rawOptions.cache),
|
|
40940
41521
|
verbose: normalizeBoolean(rawOptions.verbose),
|
|
40941
|
-
dumpPrompts: rawOptions.dumpPrompts
|
|
41522
|
+
dumpPrompts: rawOptions.dumpPrompts,
|
|
41523
|
+
dumpTraces: normalizeBoolean(rawOptions.dumpTraces),
|
|
41524
|
+
includeTrace: normalizeBoolean(rawOptions.includeTrace)
|
|
40942
41525
|
};
|
|
40943
41526
|
}
|
|
40944
41527
|
async function ensureFileExists(filePath, description) {
|
|
@@ -40949,15 +41532,15 @@ async function ensureFileExists(filePath, description) {
|
|
|
40949
41532
|
}
|
|
40950
41533
|
}
|
|
40951
41534
|
async function findRepoRoot(start) {
|
|
40952
|
-
const fallback =
|
|
41535
|
+
const fallback = path19.resolve(start);
|
|
40953
41536
|
let current = fallback;
|
|
40954
41537
|
while (current !== void 0) {
|
|
40955
|
-
const candidate =
|
|
41538
|
+
const candidate = path19.join(current, ".git");
|
|
40956
41539
|
try {
|
|
40957
41540
|
await access6(candidate, constants6.F_OK);
|
|
40958
41541
|
return current;
|
|
40959
41542
|
} catch {
|
|
40960
|
-
const parent =
|
|
41543
|
+
const parent = path19.dirname(current);
|
|
40961
41544
|
if (parent === current) {
|
|
40962
41545
|
break;
|
|
40963
41546
|
}
|
|
@@ -40970,16 +41553,16 @@ function buildDefaultOutputPath(cwd, format) {
|
|
|
40970
41553
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
40971
41554
|
const baseName = "eval";
|
|
40972
41555
|
const extension = getDefaultExtension(format);
|
|
40973
|
-
return
|
|
41556
|
+
return path19.join(cwd, ".agentv", "results", `${baseName}_${timestamp}${extension}`);
|
|
40974
41557
|
}
|
|
40975
41558
|
function resolvePromptDirectory(option4, cwd) {
|
|
40976
41559
|
if (option4 === void 0) {
|
|
40977
41560
|
return void 0;
|
|
40978
41561
|
}
|
|
40979
41562
|
if (typeof option4 === "string" && option4.trim().length > 0) {
|
|
40980
|
-
return
|
|
41563
|
+
return path19.resolve(cwd, option4);
|
|
40981
41564
|
}
|
|
40982
|
-
return
|
|
41565
|
+
return path19.join(cwd, ".agentv", "prompts");
|
|
40983
41566
|
}
|
|
40984
41567
|
function createEvaluationCache() {
|
|
40985
41568
|
const store = /* @__PURE__ */ new Map();
|
|
@@ -41004,7 +41587,7 @@ function createProgressReporter(maxWorkers) {
|
|
|
41004
41587
|
};
|
|
41005
41588
|
}
|
|
41006
41589
|
function makeEvalKey(testFilePath, evalId) {
|
|
41007
|
-
return `${
|
|
41590
|
+
return `${path19.resolve(testFilePath)}::${evalId}`;
|
|
41008
41591
|
}
|
|
41009
41592
|
function createDisplayIdTracker() {
|
|
41010
41593
|
const map2 = /* @__PURE__ */ new Map();
|
|
@@ -41108,10 +41691,6 @@ async function runSingleEvalFile(params) {
|
|
|
41108
41691
|
);
|
|
41109
41692
|
resolvedWorkers = 1;
|
|
41110
41693
|
}
|
|
41111
|
-
if (options.verbose) {
|
|
41112
|
-
const workersSource = workerPreference ? "CLI flag (balanced across files)" : resolvedTargetSelection.resolvedTarget.workers ? "target setting" : "default";
|
|
41113
|
-
console.log(`Using ${resolvedWorkers} worker(s) (source: ${workersSource})`);
|
|
41114
|
-
}
|
|
41115
41694
|
if (isVSCodeProvider && !options.dryRun) {
|
|
41116
41695
|
await ensureVSCodeSubagents({
|
|
41117
41696
|
kind: resolvedTargetSelection.resolvedTarget.kind,
|
|
@@ -41164,7 +41743,7 @@ async function runEvalCommand(input) {
|
|
|
41164
41743
|
if (options.verbose) {
|
|
41165
41744
|
console.log(`Repository root: ${repoRoot}`);
|
|
41166
41745
|
}
|
|
41167
|
-
const outputPath = options.outPath ?
|
|
41746
|
+
const outputPath = options.outPath ? path19.resolve(options.outPath) : buildDefaultOutputPath(cwd, options.format);
|
|
41168
41747
|
console.log(`Output path: ${outputPath}`);
|
|
41169
41748
|
const outputWriter = await createOutputWriter(outputPath, options.format);
|
|
41170
41749
|
const cache = options.cache ? createEvaluationCache() : void 0;
|
|
@@ -41172,7 +41751,7 @@ async function runEvalCommand(input) {
|
|
|
41172
41751
|
const allResults = [];
|
|
41173
41752
|
let lastPromptDumpDir;
|
|
41174
41753
|
const seenEvalCases = /* @__PURE__ */ new Set();
|
|
41175
|
-
const resolvedTestFiles = input.testFiles.map((file2) =>
|
|
41754
|
+
const resolvedTestFiles = input.testFiles.map((file2) => path19.resolve(file2));
|
|
41176
41755
|
const displayIdTracker = createDisplayIdTracker();
|
|
41177
41756
|
const totalWorkers = options.workers ?? DEFAULT_WORKERS;
|
|
41178
41757
|
const fileConcurrency = Math.min(
|
|
@@ -41268,7 +41847,7 @@ async function resolveEvaluationRunner() {
|
|
|
41268
41847
|
if (!overridePath) {
|
|
41269
41848
|
return runEvaluation;
|
|
41270
41849
|
}
|
|
41271
|
-
const resolved =
|
|
41850
|
+
const resolved = path19.isAbsolute(overridePath) ? overridePath : path19.resolve(process.cwd(), overridePath);
|
|
41272
41851
|
const moduleUrl = pathToFileURL(resolved).href;
|
|
41273
41852
|
const mod = await import(moduleUrl);
|
|
41274
41853
|
const candidate = mod.runEvaluation;
|
|
@@ -41369,6 +41948,14 @@ var evalCommand = command({
|
|
|
41369
41948
|
type: optional2(string4),
|
|
41370
41949
|
long: "dump-prompts",
|
|
41371
41950
|
description: "Directory path for persisting prompt payloads for debugging"
|
|
41951
|
+
}),
|
|
41952
|
+
dumpTraces: flag({
|
|
41953
|
+
long: "dump-traces",
|
|
41954
|
+
description: "Write trace files to .agentv/traces/"
|
|
41955
|
+
}),
|
|
41956
|
+
includeTrace: flag({
|
|
41957
|
+
long: "include-trace",
|
|
41958
|
+
description: "Include full trace in result output (verbose)"
|
|
41372
41959
|
})
|
|
41373
41960
|
},
|
|
41374
41961
|
handler: async (args) => {
|
|
@@ -41389,7 +41976,9 @@ var evalCommand = command({
|
|
|
41389
41976
|
maxRetries: args.maxRetries,
|
|
41390
41977
|
cache: args.cache,
|
|
41391
41978
|
verbose: args.verbose,
|
|
41392
|
-
dumpPrompts
|
|
41979
|
+
dumpPrompts,
|
|
41980
|
+
dumpTraces: args.dumpTraces,
|
|
41981
|
+
includeTrace: args.includeTrace
|
|
41393
41982
|
};
|
|
41394
41983
|
await runEvalCommand({ testFiles: resolvedPaths, rawOptions });
|
|
41395
41984
|
}
|
|
@@ -41402,7 +41991,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
41402
41991
|
const unmatched = [];
|
|
41403
41992
|
const results = /* @__PURE__ */ new Set();
|
|
41404
41993
|
for (const pattern of normalizedInputs) {
|
|
41405
|
-
const candidatePath =
|
|
41994
|
+
const candidatePath = path20.isAbsolute(pattern) ? path20.normalize(pattern) : path20.resolve(cwd, pattern);
|
|
41406
41995
|
try {
|
|
41407
41996
|
const stats = await stat4(candidatePath);
|
|
41408
41997
|
if (stats.isFile() && /\.ya?ml$/i.test(candidatePath)) {
|
|
@@ -41426,7 +42015,7 @@ async function resolveEvalPaths(evalPaths, cwd) {
|
|
|
41426
42015
|
continue;
|
|
41427
42016
|
}
|
|
41428
42017
|
for (const filePath of yamlMatches) {
|
|
41429
|
-
results.add(
|
|
42018
|
+
results.add(path20.normalize(filePath));
|
|
41430
42019
|
}
|
|
41431
42020
|
}
|
|
41432
42021
|
if (unmatched.length > 0) {
|
|
@@ -41446,7 +42035,7 @@ import { command as command2, flag as flag2, option as option2, optional as opti
|
|
|
41446
42035
|
|
|
41447
42036
|
// src/commands/generate/rubrics.ts
|
|
41448
42037
|
import { readFile as readFile8, writeFile as writeFile6 } from "node:fs/promises";
|
|
41449
|
-
import
|
|
42038
|
+
import path21 from "node:path";
|
|
41450
42039
|
import { pathToFileURL as pathToFileURL2 } from "node:url";
|
|
41451
42040
|
import { isMap, isSeq, parseDocument } from "yaml";
|
|
41452
42041
|
function isJsonObject3(value) {
|
|
@@ -41458,7 +42047,7 @@ function asString6(value) {
|
|
|
41458
42047
|
async function loadRubricGenerator() {
|
|
41459
42048
|
const customGenerator = process.env.AGENTEVO_CLI_RUBRIC_GENERATOR;
|
|
41460
42049
|
if (customGenerator) {
|
|
41461
|
-
const generatorPath =
|
|
42050
|
+
const generatorPath = path21.resolve(customGenerator);
|
|
41462
42051
|
const generatorUrl = pathToFileURL2(generatorPath).href;
|
|
41463
42052
|
const module = await import(generatorUrl);
|
|
41464
42053
|
return module.generateRubrics;
|
|
@@ -41468,7 +42057,7 @@ async function loadRubricGenerator() {
|
|
|
41468
42057
|
async function generateRubricsCommand(options) {
|
|
41469
42058
|
const { file: file2, target: targetOverride, verbose } = options;
|
|
41470
42059
|
console.log(`Generating rubrics for: ${file2}`);
|
|
41471
|
-
const absolutePath =
|
|
42060
|
+
const absolutePath = path21.resolve(file2);
|
|
41472
42061
|
const content = await readFile8(absolutePath, "utf8");
|
|
41473
42062
|
const doc = parseDocument(content);
|
|
41474
42063
|
const parsed = doc.toJSON();
|
|
@@ -41629,13 +42218,13 @@ var generateCommand = subcommands({
|
|
|
41629
42218
|
|
|
41630
42219
|
// src/commands/init/index.ts
|
|
41631
42220
|
import { existsSync, mkdirSync, writeFileSync } from "node:fs";
|
|
41632
|
-
import
|
|
42221
|
+
import path25 from "node:path";
|
|
41633
42222
|
import * as readline from "node:readline/promises";
|
|
41634
42223
|
import { command as command3, option as option3, optional as optional4, string as string6 } from "cmd-ts";
|
|
41635
42224
|
|
|
41636
42225
|
// src/templates/index.ts
|
|
41637
42226
|
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
41638
|
-
import
|
|
42227
|
+
import path24 from "node:path";
|
|
41639
42228
|
import { fileURLToPath } from "node:url";
|
|
41640
42229
|
function getGithubTemplates() {
|
|
41641
42230
|
return getTemplatesFromDir(".github");
|
|
@@ -41647,12 +42236,12 @@ function getClaudeTemplates() {
|
|
|
41647
42236
|
return getTemplatesFromDir(".claude");
|
|
41648
42237
|
}
|
|
41649
42238
|
function getTemplatesFromDir(subdir) {
|
|
41650
|
-
const currentDir =
|
|
42239
|
+
const currentDir = path24.dirname(fileURLToPath(import.meta.url));
|
|
41651
42240
|
let templatesDir;
|
|
41652
|
-
if (currentDir.includes(`${
|
|
41653
|
-
templatesDir =
|
|
42241
|
+
if (currentDir.includes(`${path24.sep}dist`)) {
|
|
42242
|
+
templatesDir = path24.join(currentDir, "templates", subdir);
|
|
41654
42243
|
} else {
|
|
41655
|
-
templatesDir =
|
|
42244
|
+
templatesDir = path24.join(currentDir, subdir);
|
|
41656
42245
|
}
|
|
41657
42246
|
return readTemplatesRecursively(templatesDir, "");
|
|
41658
42247
|
}
|
|
@@ -41660,15 +42249,15 @@ function readTemplatesRecursively(dir, relativePath) {
|
|
|
41660
42249
|
const templates = [];
|
|
41661
42250
|
const entries = readdirSync(dir);
|
|
41662
42251
|
for (const entry of entries) {
|
|
41663
|
-
const fullPath =
|
|
42252
|
+
const fullPath = path24.join(dir, entry);
|
|
41664
42253
|
const stat6 = statSync(fullPath);
|
|
41665
|
-
const entryRelativePath = relativePath ?
|
|
42254
|
+
const entryRelativePath = relativePath ? path24.join(relativePath, entry) : entry;
|
|
41666
42255
|
if (stat6.isDirectory()) {
|
|
41667
42256
|
templates.push(...readTemplatesRecursively(fullPath, entryRelativePath));
|
|
41668
42257
|
} else {
|
|
41669
42258
|
const content = readFileSync(fullPath, "utf-8");
|
|
41670
42259
|
templates.push({
|
|
41671
|
-
path: entryRelativePath.split(
|
|
42260
|
+
path: entryRelativePath.split(path24.sep).join("/"),
|
|
41672
42261
|
// Normalize to forward slashes
|
|
41673
42262
|
content
|
|
41674
42263
|
});
|
|
@@ -41691,10 +42280,10 @@ async function promptYesNo(message) {
|
|
|
41691
42280
|
}
|
|
41692
42281
|
}
|
|
41693
42282
|
async function initCommand(options = {}) {
|
|
41694
|
-
const targetPath =
|
|
41695
|
-
const githubDir =
|
|
41696
|
-
const agentvDir =
|
|
41697
|
-
const claudeDir =
|
|
42283
|
+
const targetPath = path25.resolve(options.targetPath ?? ".");
|
|
42284
|
+
const githubDir = path25.join(targetPath, ".github");
|
|
42285
|
+
const agentvDir = path25.join(targetPath, ".agentv");
|
|
42286
|
+
const claudeDir = path25.join(targetPath, ".claude");
|
|
41698
42287
|
const githubTemplates = getGithubTemplates();
|
|
41699
42288
|
const agentvTemplates = getAgentvTemplates();
|
|
41700
42289
|
const claudeTemplates = getClaudeTemplates();
|
|
@@ -41702,32 +42291,32 @@ async function initCommand(options = {}) {
|
|
|
41702
42291
|
const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.template");
|
|
41703
42292
|
const existingFiles = [];
|
|
41704
42293
|
if (envTemplate) {
|
|
41705
|
-
const envFilePath =
|
|
42294
|
+
const envFilePath = path25.join(targetPath, ".env.template");
|
|
41706
42295
|
if (existsSync(envFilePath)) {
|
|
41707
42296
|
existingFiles.push(".env.template");
|
|
41708
42297
|
}
|
|
41709
42298
|
}
|
|
41710
42299
|
if (existsSync(githubDir)) {
|
|
41711
42300
|
for (const template of githubTemplates) {
|
|
41712
|
-
const targetFilePath =
|
|
42301
|
+
const targetFilePath = path25.join(githubDir, template.path);
|
|
41713
42302
|
if (existsSync(targetFilePath)) {
|
|
41714
|
-
existingFiles.push(
|
|
42303
|
+
existingFiles.push(path25.relative(targetPath, targetFilePath));
|
|
41715
42304
|
}
|
|
41716
42305
|
}
|
|
41717
42306
|
}
|
|
41718
42307
|
if (existsSync(agentvDir)) {
|
|
41719
42308
|
for (const template of otherAgentvTemplates) {
|
|
41720
|
-
const targetFilePath =
|
|
42309
|
+
const targetFilePath = path25.join(agentvDir, template.path);
|
|
41721
42310
|
if (existsSync(targetFilePath)) {
|
|
41722
|
-
existingFiles.push(
|
|
42311
|
+
existingFiles.push(path25.relative(targetPath, targetFilePath));
|
|
41723
42312
|
}
|
|
41724
42313
|
}
|
|
41725
42314
|
}
|
|
41726
42315
|
if (existsSync(claudeDir)) {
|
|
41727
42316
|
for (const template of claudeTemplates) {
|
|
41728
|
-
const targetFilePath =
|
|
42317
|
+
const targetFilePath = path25.join(claudeDir, template.path);
|
|
41729
42318
|
if (existsSync(targetFilePath)) {
|
|
41730
|
-
existingFiles.push(
|
|
42319
|
+
existingFiles.push(path25.relative(targetPath, targetFilePath));
|
|
41731
42320
|
}
|
|
41732
42321
|
}
|
|
41733
42322
|
}
|
|
@@ -41754,36 +42343,36 @@ async function initCommand(options = {}) {
|
|
|
41754
42343
|
mkdirSync(claudeDir, { recursive: true });
|
|
41755
42344
|
}
|
|
41756
42345
|
if (envTemplate) {
|
|
41757
|
-
const envFilePath =
|
|
42346
|
+
const envFilePath = path25.join(targetPath, ".env.template");
|
|
41758
42347
|
writeFileSync(envFilePath, envTemplate.content, "utf-8");
|
|
41759
42348
|
console.log("Created .env.template");
|
|
41760
42349
|
}
|
|
41761
42350
|
for (const template of githubTemplates) {
|
|
41762
|
-
const targetFilePath =
|
|
41763
|
-
const targetDirPath =
|
|
42351
|
+
const targetFilePath = path25.join(githubDir, template.path);
|
|
42352
|
+
const targetDirPath = path25.dirname(targetFilePath);
|
|
41764
42353
|
if (!existsSync(targetDirPath)) {
|
|
41765
42354
|
mkdirSync(targetDirPath, { recursive: true });
|
|
41766
42355
|
}
|
|
41767
42356
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
41768
|
-
console.log(`Created ${
|
|
42357
|
+
console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
|
|
41769
42358
|
}
|
|
41770
42359
|
for (const template of otherAgentvTemplates) {
|
|
41771
|
-
const targetFilePath =
|
|
41772
|
-
const targetDirPath =
|
|
42360
|
+
const targetFilePath = path25.join(agentvDir, template.path);
|
|
42361
|
+
const targetDirPath = path25.dirname(targetFilePath);
|
|
41773
42362
|
if (!existsSync(targetDirPath)) {
|
|
41774
42363
|
mkdirSync(targetDirPath, { recursive: true });
|
|
41775
42364
|
}
|
|
41776
42365
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
41777
|
-
console.log(`Created ${
|
|
42366
|
+
console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
|
|
41778
42367
|
}
|
|
41779
42368
|
for (const template of claudeTemplates) {
|
|
41780
|
-
const targetFilePath =
|
|
41781
|
-
const targetDirPath =
|
|
42369
|
+
const targetFilePath = path25.join(claudeDir, template.path);
|
|
42370
|
+
const targetDirPath = path25.dirname(targetFilePath);
|
|
41782
42371
|
if (!existsSync(targetDirPath)) {
|
|
41783
42372
|
mkdirSync(targetDirPath, { recursive: true });
|
|
41784
42373
|
}
|
|
41785
42374
|
writeFileSync(targetFilePath, template.content, "utf-8");
|
|
41786
|
-
console.log(`Created ${
|
|
42375
|
+
console.log(`Created ${path25.relative(targetPath, targetFilePath)}`);
|
|
41787
42376
|
}
|
|
41788
42377
|
console.log("\nAgentV initialized successfully!");
|
|
41789
42378
|
console.log("\nFiles installed to root:");
|
|
@@ -41791,17 +42380,17 @@ async function initCommand(options = {}) {
|
|
|
41791
42380
|
console.log(" - .env.template");
|
|
41792
42381
|
}
|
|
41793
42382
|
console.log(`
|
|
41794
|
-
Files installed to ${
|
|
42383
|
+
Files installed to ${path25.relative(targetPath, githubDir)}:`);
|
|
41795
42384
|
for (const t of githubTemplates) {
|
|
41796
42385
|
console.log(` - ${t.path}`);
|
|
41797
42386
|
}
|
|
41798
42387
|
console.log(`
|
|
41799
|
-
Files installed to ${
|
|
42388
|
+
Files installed to ${path25.relative(targetPath, agentvDir)}:`);
|
|
41800
42389
|
for (const t of otherAgentvTemplates) {
|
|
41801
42390
|
console.log(` - ${t.path}`);
|
|
41802
42391
|
}
|
|
41803
42392
|
console.log(`
|
|
41804
|
-
Files installed to ${
|
|
42393
|
+
Files installed to ${path25.relative(targetPath, claudeDir)}:`);
|
|
41805
42394
|
for (const t of claudeTemplates) {
|
|
41806
42395
|
console.log(` - ${t.path}`);
|
|
41807
42396
|
}
|
|
@@ -41916,7 +42505,7 @@ function isTTY2() {
|
|
|
41916
42505
|
// src/commands/validate/validate-files.ts
|
|
41917
42506
|
import { constants as constants7 } from "node:fs";
|
|
41918
42507
|
import { access as access7, readdir as readdir3, stat as stat5 } from "node:fs/promises";
|
|
41919
|
-
import
|
|
42508
|
+
import path26 from "node:path";
|
|
41920
42509
|
async function validateFiles(paths) {
|
|
41921
42510
|
const filePaths = await expandPaths(paths);
|
|
41922
42511
|
const results = [];
|
|
@@ -41934,7 +42523,7 @@ async function validateFiles(paths) {
|
|
|
41934
42523
|
};
|
|
41935
42524
|
}
|
|
41936
42525
|
async function validateSingleFile(filePath) {
|
|
41937
|
-
const absolutePath =
|
|
42526
|
+
const absolutePath = path26.resolve(filePath);
|
|
41938
42527
|
const fileType = await detectFileType(absolutePath);
|
|
41939
42528
|
let result;
|
|
41940
42529
|
if (fileType === "eval") {
|
|
@@ -41959,7 +42548,7 @@ async function validateSingleFile(filePath) {
|
|
|
41959
42548
|
async function expandPaths(paths) {
|
|
41960
42549
|
const expanded = [];
|
|
41961
42550
|
for (const inputPath of paths) {
|
|
41962
|
-
const absolutePath =
|
|
42551
|
+
const absolutePath = path26.resolve(inputPath);
|
|
41963
42552
|
try {
|
|
41964
42553
|
await access7(absolutePath, constants7.F_OK);
|
|
41965
42554
|
} catch {
|
|
@@ -41983,7 +42572,7 @@ async function findYamlFiles(dirPath) {
|
|
|
41983
42572
|
try {
|
|
41984
42573
|
const entries = await readdir3(dirPath, { withFileTypes: true });
|
|
41985
42574
|
for (const entry of entries) {
|
|
41986
|
-
const fullPath =
|
|
42575
|
+
const fullPath = path26.join(dirPath, entry.name);
|
|
41987
42576
|
if (entry.isDirectory()) {
|
|
41988
42577
|
if (entry.name === "node_modules" || entry.name.startsWith(".")) {
|
|
41989
42578
|
continue;
|
|
@@ -42000,7 +42589,7 @@ async function findYamlFiles(dirPath) {
|
|
|
42000
42589
|
return results;
|
|
42001
42590
|
}
|
|
42002
42591
|
function isYamlFile(filePath) {
|
|
42003
|
-
const ext =
|
|
42592
|
+
const ext = path26.extname(filePath).toLowerCase();
|
|
42004
42593
|
return ext === ".yaml" || ext === ".yml";
|
|
42005
42594
|
}
|
|
42006
42595
|
|
|
@@ -42058,4 +42647,4 @@ export {
|
|
|
42058
42647
|
app,
|
|
42059
42648
|
runCli
|
|
42060
42649
|
};
|
|
42061
|
-
//# sourceMappingURL=chunk-
|
|
42650
|
+
//# sourceMappingURL=chunk-ZVSFP6NK.js.map
|