agentv 4.35.1 → 4.36.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{artifact-writer-G57MG52C.js → artifact-writer-3YRN6YTA.js} +4 -4
- package/dist/{chunk-CRMGUVRZ.js → chunk-4M6FAQTW.js} +85 -19
- package/dist/chunk-4M6FAQTW.js.map +1 -0
- package/dist/{chunk-INOKS5LF.js → chunk-7KZ2AF26.js} +269 -57
- package/dist/chunk-7KZ2AF26.js.map +1 -0
- package/dist/{chunk-KJGYL3M3.js → chunk-HVBAVOAH.js} +72 -50
- package/dist/chunk-HVBAVOAH.js.map +1 -0
- package/dist/{chunk-KNF3AGCI.js → chunk-P5JONEWJ.js} +231 -35
- package/dist/chunk-P5JONEWJ.js.map +1 -0
- package/dist/{chunk-6QEIZ33V.js → chunk-TUTURE2B.js} +1227 -372
- package/dist/chunk-TUTURE2B.js.map +1 -0
- package/dist/cli.js +5 -5
- package/dist/dashboard/assets/index-DA96FAM5.js +119 -0
- package/dist/dashboard/assets/{index-Bdk-9a_8.js → index-l4t97uO8.js} +1 -1
- package/dist/dashboard/assets/index-nmrFBoNd.css +1 -0
- package/dist/dashboard/index.html +2 -2
- package/dist/{dist-M4B77IW4.js → dist-BSFUYS54.js} +73 -3
- package/dist/index.js +5 -5
- package/dist/{interactive-VYQ5SYMR.js → interactive-IEC63EVP.js} +5 -5
- package/dist/skills/agentv-eval-writer/SKILL.md +6 -0
- package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js → ts-eval-loader-4DU65XGW-YM47FFG2.js} +2 -2
- package/package.json +1 -1
- package/dist/chunk-6QEIZ33V.js.map +0 -1
- package/dist/chunk-CRMGUVRZ.js.map +0 -1
- package/dist/chunk-INOKS5LF.js.map +0 -1
- package/dist/chunk-KJGYL3M3.js.map +0 -1
- package/dist/chunk-KNF3AGCI.js.map +0 -1
- package/dist/dashboard/assets/index-BPMAZqjE.css +0 -1
- package/dist/dashboard/assets/index-BWO0UcxG.js +0 -118
- /package/dist/{artifact-writer-G57MG52C.js.map → artifact-writer-3YRN6YTA.js.map} +0 -0
- /package/dist/{dist-M4B77IW4.js.map → dist-BSFUYS54.js.map} +0 -0
- /package/dist/{interactive-VYQ5SYMR.js.map → interactive-IEC63EVP.js.map} +0 -0
- /package/dist/{ts-eval-loader-EQJX3OLT-THE7D3GR.js.map → ts-eval-loader-4DU65XGW-YM47FFG2.js.map} +0 -0
|
@@ -493,8 +493,8 @@ function getErrorMap() {
|
|
|
493
493
|
|
|
494
494
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/helpers/parseUtil.js
|
|
495
495
|
var makeIssue = (params) => {
|
|
496
|
-
const { data, path:
|
|
497
|
-
const fullPath = [...
|
|
496
|
+
const { data, path: path48, errorMaps, issueData } = params;
|
|
497
|
+
const fullPath = [...path48, ...issueData.path || []];
|
|
498
498
|
const fullIssue = {
|
|
499
499
|
...issueData,
|
|
500
500
|
path: fullPath
|
|
@@ -610,11 +610,11 @@ var errorUtil;
|
|
|
610
610
|
|
|
611
611
|
// ../../node_modules/.bun/zod@3.25.76/node_modules/zod/v3/types.js
|
|
612
612
|
var ParseInputLazyPath = class {
|
|
613
|
-
constructor(parent, value,
|
|
613
|
+
constructor(parent, value, path48, key) {
|
|
614
614
|
this._cachedPath = [];
|
|
615
615
|
this.parent = parent;
|
|
616
616
|
this.data = value;
|
|
617
|
-
this._path =
|
|
617
|
+
this._path = path48;
|
|
618
618
|
this._key = key;
|
|
619
619
|
}
|
|
620
620
|
get path() {
|
|
@@ -4056,7 +4056,7 @@ var coerce = {
|
|
|
4056
4056
|
};
|
|
4057
4057
|
var NEVER = INVALID;
|
|
4058
4058
|
|
|
4059
|
-
// ../../packages/core/dist/chunk-
|
|
4059
|
+
// ../../packages/core/dist/chunk-GPRJDMQ6.js
|
|
4060
4060
|
import { parse } from "yaml";
|
|
4061
4061
|
import os from "node:os";
|
|
4062
4062
|
import path from "node:path";
|
|
@@ -4348,7 +4348,8 @@ var KNOWN_PROVIDERS = [
|
|
|
4348
4348
|
"vscode",
|
|
4349
4349
|
"vscode-insiders",
|
|
4350
4350
|
"agentv",
|
|
4351
|
-
"transcript"
|
|
4351
|
+
"transcript",
|
|
4352
|
+
"replay"
|
|
4352
4353
|
];
|
|
4353
4354
|
var PROVIDER_ALIASES = [
|
|
4354
4355
|
"azure-openai",
|
|
@@ -4890,6 +4891,12 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath, op
|
|
|
4890
4891
|
...base,
|
|
4891
4892
|
config: resolveCliConfig(parsed, env, evalFilePath)
|
|
4892
4893
|
};
|
|
4894
|
+
case "replay":
|
|
4895
|
+
return {
|
|
4896
|
+
kind: "replay",
|
|
4897
|
+
...base,
|
|
4898
|
+
config: resolveReplayConfig(parsed, env, evalFilePath)
|
|
4899
|
+
};
|
|
4893
4900
|
default:
|
|
4894
4901
|
return {
|
|
4895
4902
|
kind: "cli",
|
|
@@ -5549,6 +5556,35 @@ function resolveMockConfig(target) {
|
|
|
5549
5556
|
const response = typeof target.response === "string" ? target.response : void 0;
|
|
5550
5557
|
return { response };
|
|
5551
5558
|
}
|
|
5559
|
+
function resolveReplayConfig(target, env, evalFilePath) {
|
|
5560
|
+
const fixtures = resolveString(target.fixtures, env, `${target.name} replay fixtures`, true);
|
|
5561
|
+
const fixturesPath = evalFilePath && !path3.isAbsolute(fixtures) ? path3.resolve(path3.dirname(path3.resolve(evalFilePath)), fixtures) : path3.resolve(fixtures);
|
|
5562
|
+
const sourceTarget = resolveString(
|
|
5563
|
+
target.source_target,
|
|
5564
|
+
env,
|
|
5565
|
+
`${target.name} replay source_target`,
|
|
5566
|
+
true
|
|
5567
|
+
);
|
|
5568
|
+
const suite = resolveOptionalString(target.suite, env, `${target.name} replay suite`, {
|
|
5569
|
+
allowLiteral: true,
|
|
5570
|
+
optionalEnv: true
|
|
5571
|
+
});
|
|
5572
|
+
const evalPath = resolveOptionalString(target.eval_path, env, `${target.name} replay eval_path`, {
|
|
5573
|
+
allowLiteral: true,
|
|
5574
|
+
optionalEnv: true
|
|
5575
|
+
});
|
|
5576
|
+
const variant = resolveOptionalString(target.variant, env, `${target.name} replay variant`, {
|
|
5577
|
+
allowLiteral: true,
|
|
5578
|
+
optionalEnv: true
|
|
5579
|
+
});
|
|
5580
|
+
return {
|
|
5581
|
+
fixturesPath,
|
|
5582
|
+
sourceTarget,
|
|
5583
|
+
suite,
|
|
5584
|
+
evalPath,
|
|
5585
|
+
variant
|
|
5586
|
+
};
|
|
5587
|
+
}
|
|
5552
5588
|
function resolveVSCodeConfig(target, env, insiders, _evalFilePath) {
|
|
5553
5589
|
const executableSource = target.executable;
|
|
5554
5590
|
const waitSource = target.wait;
|
|
@@ -6087,19 +6123,20 @@ async function expandFileReferences(tests, evalFileDir) {
|
|
|
6087
6123
|
return expanded;
|
|
6088
6124
|
}
|
|
6089
6125
|
|
|
6090
|
-
// ../../packages/core/dist/chunk-
|
|
6091
|
-
import
|
|
6126
|
+
// ../../packages/core/dist/chunk-FQ5QRJIT.js
|
|
6127
|
+
import path47 from "node:path";
|
|
6092
6128
|
import { pathToFileURL as pathToFileURL2 } from "node:url";
|
|
6093
6129
|
import { existsSync as existsSync6 } from "node:fs";
|
|
6094
|
-
import
|
|
6130
|
+
import path46 from "node:path";
|
|
6095
6131
|
import micromatch4 from "micromatch";
|
|
6096
6132
|
import { mkdir, readFile as readFile3, writeFile } from "node:fs/promises";
|
|
6097
6133
|
import path5 from "node:path";
|
|
6098
6134
|
import { execFile as execFile3 } from "node:child_process";
|
|
6099
|
-
import { createHash as
|
|
6135
|
+
import { createHash as createHash3, randomUUID as randomUUID9 } from "node:crypto";
|
|
6100
6136
|
import { existsSync as existsSync5 } from "node:fs";
|
|
6101
|
-
import { copyFile as copyFile2, mkdir as
|
|
6102
|
-
import
|
|
6137
|
+
import { copyFile as copyFile2, mkdir as mkdir16, readdir as readdir8, stat as stat9 } from "node:fs/promises";
|
|
6138
|
+
import path45 from "node:path";
|
|
6139
|
+
import { fileURLToPath as fileURLToPath5 } from "node:url";
|
|
6103
6140
|
import { promisify as promisify7 } from "node:util";
|
|
6104
6141
|
import micromatch3 from "micromatch";
|
|
6105
6142
|
import { mkdtemp, rm, writeFile as writeFile2 } from "node:fs/promises";
|
|
@@ -6818,10 +6855,10 @@ function assignProp(target, prop, value) {
|
|
|
6818
6855
|
configurable: true
|
|
6819
6856
|
});
|
|
6820
6857
|
}
|
|
6821
|
-
function getElementAtPath(obj,
|
|
6822
|
-
if (!
|
|
6858
|
+
function getElementAtPath(obj, path48) {
|
|
6859
|
+
if (!path48)
|
|
6823
6860
|
return obj;
|
|
6824
|
-
return
|
|
6861
|
+
return path48.reduce((acc, key) => acc?.[key], obj);
|
|
6825
6862
|
}
|
|
6826
6863
|
function promiseAllObject(promisesObj) {
|
|
6827
6864
|
const keys = Object.keys(promisesObj);
|
|
@@ -7141,11 +7178,11 @@ function aborted(x, startIndex = 0) {
|
|
|
7141
7178
|
}
|
|
7142
7179
|
return false;
|
|
7143
7180
|
}
|
|
7144
|
-
function prefixIssues(
|
|
7181
|
+
function prefixIssues(path48, issues) {
|
|
7145
7182
|
return issues.map((iss) => {
|
|
7146
7183
|
var _a;
|
|
7147
7184
|
(_a = iss).path ?? (_a.path = []);
|
|
7148
|
-
iss.path.unshift(
|
|
7185
|
+
iss.path.unshift(path48);
|
|
7149
7186
|
return iss;
|
|
7150
7187
|
});
|
|
7151
7188
|
}
|
|
@@ -7282,7 +7319,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7282
7319
|
return issue2.message;
|
|
7283
7320
|
};
|
|
7284
7321
|
const result = { errors: [] };
|
|
7285
|
-
const processError = (error41,
|
|
7322
|
+
const processError = (error41, path48 = []) => {
|
|
7286
7323
|
var _a, _b;
|
|
7287
7324
|
for (const issue2 of error41.issues) {
|
|
7288
7325
|
if (issue2.code === "invalid_union" && issue2.errors.length) {
|
|
@@ -7292,7 +7329,7 @@ function treeifyError(error40, _mapper) {
|
|
|
7292
7329
|
} else if (issue2.code === "invalid_element") {
|
|
7293
7330
|
processError({ issues: issue2.issues }, issue2.path);
|
|
7294
7331
|
} else {
|
|
7295
|
-
const fullpath = [...
|
|
7332
|
+
const fullpath = [...path48, ...issue2.path];
|
|
7296
7333
|
if (fullpath.length === 0) {
|
|
7297
7334
|
result.errors.push(mapper(issue2));
|
|
7298
7335
|
continue;
|
|
@@ -7322,9 +7359,9 @@ function treeifyError(error40, _mapper) {
|
|
|
7322
7359
|
processError(error40);
|
|
7323
7360
|
return result;
|
|
7324
7361
|
}
|
|
7325
|
-
function toDotPath(
|
|
7362
|
+
function toDotPath(path48) {
|
|
7326
7363
|
const segs = [];
|
|
7327
|
-
for (const seg of
|
|
7364
|
+
for (const seg of path48) {
|
|
7328
7365
|
if (typeof seg === "number")
|
|
7329
7366
|
segs.push(`[${seg}]`);
|
|
7330
7367
|
else if (typeof seg === "symbol")
|
|
@@ -18754,7 +18791,7 @@ var RequestError = class _RequestError extends Error {
|
|
|
18754
18791
|
}
|
|
18755
18792
|
};
|
|
18756
18793
|
|
|
18757
|
-
// ../../packages/core/dist/chunk-
|
|
18794
|
+
// ../../packages/core/dist/chunk-FQ5QRJIT.js
|
|
18758
18795
|
import { exec as execCallback } from "node:child_process";
|
|
18759
18796
|
import { readdirSync, statSync } from "node:fs";
|
|
18760
18797
|
import { readFile as readFile32, readdir as readdir2, stat as stat2 } from "node:fs/promises";
|
|
@@ -18788,74 +18825,77 @@ import { mkdir as mkdir8 } from "node:fs/promises";
|
|
|
18788
18825
|
import path16 from "node:path";
|
|
18789
18826
|
import { createInterface } from "node:readline";
|
|
18790
18827
|
import { fileURLToPath as fileURLToPath3, pathToFileURL } from "node:url";
|
|
18828
|
+
import { createHash } from "node:crypto";
|
|
18829
|
+
import { mkdir as mkdir9, readFile as readFile6, writeFile as writeFile4 } from "node:fs/promises";
|
|
18830
|
+
import path17 from "node:path";
|
|
18791
18831
|
import { exec as exec2 } from "node:child_process";
|
|
18792
18832
|
import { constants as constants2, access as access2 } from "node:fs/promises";
|
|
18793
|
-
import
|
|
18833
|
+
import path28 from "node:path";
|
|
18794
18834
|
import { promisify as promisify4 } from "node:util";
|
|
18795
|
-
import { stat as stat5, writeFile as
|
|
18796
|
-
import
|
|
18835
|
+
import { stat as stat5, writeFile as writeFile7 } from "node:fs/promises";
|
|
18836
|
+
import path26 from "node:path";
|
|
18797
18837
|
import { constants as constants3 } from "node:fs";
|
|
18798
|
-
import { access as access3, mkdir as
|
|
18799
|
-
import path17 from "node:path";
|
|
18838
|
+
import { access as access3, mkdir as mkdir10, readdir as readdir3, rm as rm3, stat as stat3 } from "node:fs/promises";
|
|
18800
18839
|
import path18 from "node:path";
|
|
18801
18840
|
import path19 from "node:path";
|
|
18802
|
-
import { readFile as readFile6 } from "node:fs/promises";
|
|
18803
18841
|
import path20 from "node:path";
|
|
18842
|
+
import { readFile as readFile7 } from "node:fs/promises";
|
|
18843
|
+
import path21 from "node:path";
|
|
18804
18844
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
18805
|
-
import { mkdir as
|
|
18806
|
-
import
|
|
18845
|
+
import { mkdir as mkdir11, writeFile as writeFile5 } from "node:fs/promises";
|
|
18846
|
+
import path23 from "node:path";
|
|
18807
18847
|
import { promisify as promisify3 } from "node:util";
|
|
18808
|
-
import
|
|
18809
|
-
import { copyFile, mkdir as
|
|
18848
|
+
import path222 from "node:path";
|
|
18849
|
+
import { copyFile, mkdir as mkdir12, readFile as readFile8, readdir as readdir4, stat as stat4, writeFile as writeFile6 } from "node:fs/promises";
|
|
18850
|
+
import path25 from "node:path";
|
|
18810
18851
|
import path24 from "node:path";
|
|
18811
|
-
import path23 from "node:path";
|
|
18812
18852
|
import JSON5 from "json5";
|
|
18813
|
-
import { writeFile as
|
|
18814
|
-
import
|
|
18853
|
+
import { writeFile as writeFile8 } from "node:fs/promises";
|
|
18854
|
+
import path27 from "node:path";
|
|
18815
18855
|
import { constants as constants32 } from "node:fs";
|
|
18816
|
-
import { access as access32, readFile as
|
|
18817
|
-
import path28 from "node:path";
|
|
18856
|
+
import { access as access32, readFile as readFile9 } from "node:fs/promises";
|
|
18818
18857
|
import path29 from "node:path";
|
|
18819
|
-
import fg2 from "fast-glob";
|
|
18820
18858
|
import path30 from "node:path";
|
|
18859
|
+
import fg2 from "fast-glob";
|
|
18821
18860
|
import path31 from "node:path";
|
|
18822
|
-
import fg22 from "fast-glob";
|
|
18823
18861
|
import path322 from "node:path";
|
|
18824
|
-
import
|
|
18825
|
-
import { cp, mkdir as mkdir13, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
|
|
18862
|
+
import fg22 from "fast-glob";
|
|
18826
18863
|
import path33 from "node:path";
|
|
18864
|
+
import fg3 from "fast-glob";
|
|
18865
|
+
import { cp, mkdir as mkdir14, readdir as readdir5, rm as rm4, stat as stat6 } from "node:fs/promises";
|
|
18866
|
+
import path34 from "node:path";
|
|
18827
18867
|
import { execFile } from "node:child_process";
|
|
18828
|
-
import { createHash } from "node:crypto";
|
|
18868
|
+
import { createHash as createHash2 } from "node:crypto";
|
|
18829
18869
|
import { existsSync as existsSync3 } from "node:fs";
|
|
18830
|
-
import { cp as cp2, mkdir as
|
|
18831
|
-
import
|
|
18870
|
+
import { cp as cp2, mkdir as mkdir15, readFile as readFile10, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile9 } from "node:fs/promises";
|
|
18871
|
+
import path35 from "node:path";
|
|
18832
18872
|
import { promisify as promisify5 } from "node:util";
|
|
18833
18873
|
import { execFile as execFile2 } from "node:child_process";
|
|
18834
18874
|
import { existsSync as existsSync4 } from "node:fs";
|
|
18835
|
-
import
|
|
18875
|
+
import path36 from "node:path";
|
|
18836
18876
|
import { promisify as promisify6 } from "node:util";
|
|
18837
18877
|
import { readdir as readdir7, stat as stat7 } from "node:fs/promises";
|
|
18838
|
-
import
|
|
18839
|
-
import { readFile as
|
|
18840
|
-
import
|
|
18878
|
+
import path37 from "node:path";
|
|
18879
|
+
import { readFile as readFile17, stat as stat8 } from "node:fs/promises";
|
|
18880
|
+
import path44 from "node:path";
|
|
18841
18881
|
import micromatch2 from "micromatch";
|
|
18842
18882
|
import { stringify as stringifyYaml } from "yaml";
|
|
18843
|
-
import { readFile as readFile10 } from "node:fs/promises";
|
|
18844
|
-
import path37 from "node:path";
|
|
18845
18883
|
import { readFile as readFile11 } from "node:fs/promises";
|
|
18846
|
-
import
|
|
18884
|
+
import path38 from "node:path";
|
|
18885
|
+
import { readFile as readFile12 } from "node:fs/promises";
|
|
18886
|
+
import path40 from "node:path";
|
|
18847
18887
|
import { constants as constants4 } from "node:fs";
|
|
18848
18888
|
import { access as access4 } from "node:fs/promises";
|
|
18849
|
-
import
|
|
18889
|
+
import path39 from "node:path";
|
|
18850
18890
|
import { fileURLToPath as fileURLToPath4 } from "node:url";
|
|
18891
|
+
import { readFile as readFile14 } from "node:fs/promises";
|
|
18892
|
+
import path41 from "node:path";
|
|
18851
18893
|
import { readFile as readFile13 } from "node:fs/promises";
|
|
18852
|
-
import
|
|
18853
|
-
import
|
|
18894
|
+
import { readFile as readFile16 } from "node:fs/promises";
|
|
18895
|
+
import path43 from "node:path";
|
|
18896
|
+
import micromatch from "micromatch";
|
|
18854
18897
|
import { readFile as readFile15 } from "node:fs/promises";
|
|
18855
18898
|
import path422 from "node:path";
|
|
18856
|
-
import micromatch from "micromatch";
|
|
18857
|
-
import { readFile as readFile14 } from "node:fs/promises";
|
|
18858
|
-
import path41 from "node:path";
|
|
18859
18899
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18860
18900
|
var ResponseCache = class {
|
|
18861
18901
|
cachePath;
|
|
@@ -19469,13 +19509,14 @@ var CodeGrader = class {
|
|
|
19469
19509
|
}
|
|
19470
19510
|
return imageTmpDir;
|
|
19471
19511
|
};
|
|
19472
|
-
const
|
|
19473
|
-
|
|
19512
|
+
const transcriptMessages = context.trace?.messages ?? context.output ?? [];
|
|
19513
|
+
const materializedMessages = await materializeContentForGrader(
|
|
19514
|
+
transcriptMessages,
|
|
19474
19515
|
getImageDir
|
|
19475
19516
|
);
|
|
19476
|
-
let outputForPayload =
|
|
19517
|
+
let outputForPayload = context.candidate;
|
|
19477
19518
|
let outputPath;
|
|
19478
|
-
if (outputForPayload) {
|
|
19519
|
+
if (outputForPayload !== null) {
|
|
19479
19520
|
const serialized = JSON.stringify(outputForPayload);
|
|
19480
19521
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
19481
19522
|
const tmpDir = await mkdtemp(join(tmpdir(), "agentv-grader-"));
|
|
@@ -19484,6 +19525,10 @@ var CodeGrader = class {
|
|
|
19484
19525
|
outputForPayload = null;
|
|
19485
19526
|
}
|
|
19486
19527
|
}
|
|
19528
|
+
const traceForPayload = context.trace ? {
|
|
19529
|
+
...context.trace,
|
|
19530
|
+
messages: materializedMessages ?? context.trace.messages
|
|
19531
|
+
} : null;
|
|
19487
19532
|
const payload = {
|
|
19488
19533
|
criteria: context.evalCase.criteria,
|
|
19489
19534
|
expectedOutput: await materializeContentForGrader(
|
|
@@ -19491,6 +19536,8 @@ var CodeGrader = class {
|
|
|
19491
19536
|
getImageDir
|
|
19492
19537
|
),
|
|
19493
19538
|
output: outputForPayload,
|
|
19539
|
+
answer: context.candidate,
|
|
19540
|
+
messages: materializedMessages ?? [],
|
|
19494
19541
|
outputPath,
|
|
19495
19542
|
inputFiles: context.evalCase.file_paths,
|
|
19496
19543
|
input: await materializeContentForGrader(
|
|
@@ -19498,7 +19545,14 @@ var CodeGrader = class {
|
|
|
19498
19545
|
getImageDir
|
|
19499
19546
|
),
|
|
19500
19547
|
metadata: context.evalCase.metadata ?? null,
|
|
19501
|
-
trace:
|
|
19548
|
+
trace: traceForPayload,
|
|
19549
|
+
traceSummary: context.trace ? {
|
|
19550
|
+
eventCount: context.trace.eventCount,
|
|
19551
|
+
toolCalls: context.trace.toolCalls,
|
|
19552
|
+
errorCount: context.trace.errorCount,
|
|
19553
|
+
toolDurations: context.trace.toolDurations,
|
|
19554
|
+
llmCallCount: context.trace.llmCallCount
|
|
19555
|
+
} : null,
|
|
19502
19556
|
tokenUsage: context.tokenUsage ?? null,
|
|
19503
19557
|
costUsd: context.costUsd ?? null,
|
|
19504
19558
|
durationMs: context.durationMs ?? null,
|
|
@@ -21367,10 +21421,17 @@ var NORMALIZED_TRACE_EVENT_TYPES = [
|
|
|
21367
21421
|
"message",
|
|
21368
21422
|
"model_turn",
|
|
21369
21423
|
"tool_call",
|
|
21370
|
-
"tool_result"
|
|
21424
|
+
"tool_result",
|
|
21425
|
+
"final_response",
|
|
21426
|
+
"error"
|
|
21371
21427
|
];
|
|
21372
21428
|
var NORMALIZED_TOOL_STATUSES = ["ok", "error", "timeout", "cancelled", "unknown"];
|
|
21373
21429
|
var NORMALIZED_REDACTION_LEVELS = ["none", "partial", "full"];
|
|
21430
|
+
var TRACE_SCHEMA_VERSION = NORMALIZED_TRAJECTORY_SCHEMA_VERSION;
|
|
21431
|
+
var TRACE_SOURCE_KINDS = NORMALIZED_TRACE_SOURCE_KINDS;
|
|
21432
|
+
var TRACE_EVENT_TYPES = NORMALIZED_TRACE_EVENT_TYPES;
|
|
21433
|
+
var TRACE_TOOL_STATUSES = NORMALIZED_TOOL_STATUSES;
|
|
21434
|
+
var TRACE_REDACTION_LEVELS = NORMALIZED_REDACTION_LEVELS;
|
|
21374
21435
|
function omitUndefinedProperties(value) {
|
|
21375
21436
|
return Object.fromEntries(
|
|
21376
21437
|
Object.entries(value).filter(([, property]) => property !== void 0)
|
|
@@ -21474,6 +21535,7 @@ var NormalizedTraceEventWireSchema = external_exports.object({
|
|
|
21474
21535
|
message: NormalizedTraceMessageWireSchema.optional(),
|
|
21475
21536
|
model: NormalizedTraceModelWireSchema.optional(),
|
|
21476
21537
|
tool: NormalizedTraceToolWireSchema.optional(),
|
|
21538
|
+
error: NormalizedTraceErrorWireSchema.optional(),
|
|
21477
21539
|
source_ref: NormalizedTraceSourceRefWireSchema.optional(),
|
|
21478
21540
|
raw_evidence: external_exports.array(NormalizedRawEvidenceWireSchema).optional(),
|
|
21479
21541
|
redaction: NormalizedRedactionStateWireSchema.optional(),
|
|
@@ -21492,6 +21554,18 @@ var NormalizedTrajectoryWireSchema = external_exports.object({
|
|
|
21492
21554
|
ended_at: external_exports.string().optional(),
|
|
21493
21555
|
metadata: MetadataWireSchema.optional()
|
|
21494
21556
|
});
|
|
21557
|
+
var TraceRedactionStateWireSchema = NormalizedRedactionStateWireSchema;
|
|
21558
|
+
var TraceErrorWireSchema = NormalizedTraceErrorWireSchema;
|
|
21559
|
+
var TraceSourceWireSchema = NormalizedTraceSourceWireSchema;
|
|
21560
|
+
var TraceSessionWireSchema = NormalizedTraceSessionWireSchema;
|
|
21561
|
+
var TraceBranchWireSchema = NormalizedTraceBranchWireSchema;
|
|
21562
|
+
var TraceSourceRefWireSchema = NormalizedTraceSourceRefWireSchema;
|
|
21563
|
+
var TraceRawEvidenceWireSchema = NormalizedRawEvidenceWireSchema;
|
|
21564
|
+
var TraceMessageWireSchema = NormalizedTraceMessageWireSchema;
|
|
21565
|
+
var TraceModelWireSchema = NormalizedTraceModelWireSchema;
|
|
21566
|
+
var TraceToolWireSchema = NormalizedTraceToolWireSchema;
|
|
21567
|
+
var TraceEventWireSchema = NormalizedTraceEventWireSchema;
|
|
21568
|
+
var TraceArtifactWireSchema = NormalizedTrajectoryWireSchema;
|
|
21495
21569
|
function toNormalizedTrajectoryWire(trajectory) {
|
|
21496
21570
|
return NormalizedTrajectoryWireSchema.parse(
|
|
21497
21571
|
omitUndefinedProperties({
|
|
@@ -21525,6 +21599,12 @@ function fromNormalizedTrajectoryWire(input) {
|
|
|
21525
21599
|
metadata: wire.metadata
|
|
21526
21600
|
};
|
|
21527
21601
|
}
|
|
21602
|
+
function toTraceArtifactWire(artifact) {
|
|
21603
|
+
return toNormalizedTrajectoryWire(artifact);
|
|
21604
|
+
}
|
|
21605
|
+
function fromTraceArtifactWire(input) {
|
|
21606
|
+
return fromNormalizedTrajectoryWire(input);
|
|
21607
|
+
}
|
|
21528
21608
|
function toNormalizedTraceSourceWire(source) {
|
|
21529
21609
|
return omitUndefinedProperties({
|
|
21530
21610
|
kind: source.kind,
|
|
@@ -21599,6 +21679,7 @@ function toNormalizedTraceEventWire(event) {
|
|
|
21599
21679
|
message: event.message ? toNormalizedTraceMessageWire(event.message) : void 0,
|
|
21600
21680
|
model: event.model ? toNormalizedTraceModelWire(event.model) : void 0,
|
|
21601
21681
|
tool: event.tool ? toNormalizedTraceToolWire(event.tool) : void 0,
|
|
21682
|
+
error: event.error ? toNormalizedTraceErrorWire(event.error) : void 0,
|
|
21602
21683
|
source_ref: event.sourceRef ? toNormalizedTraceSourceRefWire(event.sourceRef) : void 0,
|
|
21603
21684
|
raw_evidence: event.rawEvidence?.map(toNormalizedRawEvidenceWire),
|
|
21604
21685
|
redaction: event.redaction,
|
|
@@ -21619,6 +21700,7 @@ function fromNormalizedTraceEventWire(event) {
|
|
|
21619
21700
|
message: event.message ? fromNormalizedTraceMessageWire(event.message) : void 0,
|
|
21620
21701
|
model: event.model ? fromNormalizedTraceModelWire(event.model) : void 0,
|
|
21621
21702
|
tool: event.tool ? fromNormalizedTraceToolWire(event.tool) : void 0,
|
|
21703
|
+
error: event.error ? fromNormalizedTraceErrorWire(event.error) : void 0,
|
|
21622
21704
|
sourceRef: event.source_ref ? fromNormalizedTraceSourceRefWire(event.source_ref) : void 0,
|
|
21623
21705
|
rawEvidence: event.raw_evidence?.map(fromNormalizedRawEvidenceWire),
|
|
21624
21706
|
redaction: event.redaction,
|
|
@@ -21687,6 +21769,24 @@ function fromNormalizedTraceToolWire(tool) {
|
|
|
21687
21769
|
metadata: tool.metadata
|
|
21688
21770
|
};
|
|
21689
21771
|
}
|
|
21772
|
+
function toNormalizedTraceErrorWire(error40) {
|
|
21773
|
+
return omitUndefinedProperties({
|
|
21774
|
+
message: error40.message,
|
|
21775
|
+
name: error40.name,
|
|
21776
|
+
code: error40.code,
|
|
21777
|
+
stack: error40.stack,
|
|
21778
|
+
metadata: error40.metadata
|
|
21779
|
+
});
|
|
21780
|
+
}
|
|
21781
|
+
function fromNormalizedTraceErrorWire(error40) {
|
|
21782
|
+
return {
|
|
21783
|
+
message: error40.message,
|
|
21784
|
+
name: error40.name,
|
|
21785
|
+
code: error40.code,
|
|
21786
|
+
stack: error40.stack,
|
|
21787
|
+
metadata: error40.metadata
|
|
21788
|
+
};
|
|
21789
|
+
}
|
|
21690
21790
|
function toNormalizedTraceSourceRefWire(sourceRef) {
|
|
21691
21791
|
return omitUndefinedProperties({
|
|
21692
21792
|
event_id: sourceRef.eventId,
|
|
@@ -21731,6 +21831,163 @@ function fromNormalizedRawEvidenceWire(evidence) {
|
|
|
21731
21831
|
metadata: evidence.metadata
|
|
21732
21832
|
};
|
|
21733
21833
|
}
|
|
21834
|
+
function sameMessageContent(first, second) {
|
|
21835
|
+
if (!first || !second) return false;
|
|
21836
|
+
return first.role === second.role && JSON.stringify(first.content) === JSON.stringify(second.content);
|
|
21837
|
+
}
|
|
21838
|
+
function buildTraceMessages(input, output) {
|
|
21839
|
+
const outputMessages = output ?? [];
|
|
21840
|
+
if (outputMessages.length === 0) {
|
|
21841
|
+
return input ?? [];
|
|
21842
|
+
}
|
|
21843
|
+
const outputLooksLikeFullTranscript = outputMessages.some(
|
|
21844
|
+
(message) => message.role === "user" || message.role === "system"
|
|
21845
|
+
);
|
|
21846
|
+
if (outputLooksLikeFullTranscript) {
|
|
21847
|
+
return outputMessages;
|
|
21848
|
+
}
|
|
21849
|
+
const inputMessages = input ?? [];
|
|
21850
|
+
if (inputMessages.length === 1 && outputMessages.length > 0 && sameMessageContent(inputMessages[0], outputMessages[0])) {
|
|
21851
|
+
return outputMessages;
|
|
21852
|
+
}
|
|
21853
|
+
return [...inputMessages, ...outputMessages];
|
|
21854
|
+
}
|
|
21855
|
+
function toTraceMessage(message) {
|
|
21856
|
+
return {
|
|
21857
|
+
role: message.role,
|
|
21858
|
+
name: message.name,
|
|
21859
|
+
content: message.content,
|
|
21860
|
+
tokenUsage: message.tokenUsage,
|
|
21861
|
+
metadata: message.metadata
|
|
21862
|
+
};
|
|
21863
|
+
}
|
|
21864
|
+
function toTraceError(error40) {
|
|
21865
|
+
return typeof error40 === "string" ? { message: error40 } : error40;
|
|
21866
|
+
}
|
|
21867
|
+
function buildTraceFromMessages(options = {}) {
|
|
21868
|
+
const messages = buildTraceMessages(options.input, options.output);
|
|
21869
|
+
const computed = computeTraceSummary(messages);
|
|
21870
|
+
const summary = options.summary ?? computed.trace;
|
|
21871
|
+
const events = [];
|
|
21872
|
+
let ordinal = 0;
|
|
21873
|
+
for (const [messageIndex, message] of messages.entries()) {
|
|
21874
|
+
const eventId = `message-${messageIndex}`;
|
|
21875
|
+
events.push({
|
|
21876
|
+
eventId,
|
|
21877
|
+
ordinal: ordinal++,
|
|
21878
|
+
type: "message",
|
|
21879
|
+
timestamp: message.startTime,
|
|
21880
|
+
durationMs: message.durationMs,
|
|
21881
|
+
message: toTraceMessage(message),
|
|
21882
|
+
metadata: { message_index: messageIndex }
|
|
21883
|
+
});
|
|
21884
|
+
for (const [toolIndex, toolCall] of (message.toolCalls ?? []).entries()) {
|
|
21885
|
+
const toolEventId = `message-${messageIndex}-tool-${toolIndex}`;
|
|
21886
|
+
events.push({
|
|
21887
|
+
eventId: toolEventId,
|
|
21888
|
+
parentEventId: eventId,
|
|
21889
|
+
ordinal: ordinal++,
|
|
21890
|
+
type: "tool_call",
|
|
21891
|
+
timestamp: toolCall.startTime,
|
|
21892
|
+
durationMs: toolCall.durationMs,
|
|
21893
|
+
tool: {
|
|
21894
|
+
name: toolCall.tool,
|
|
21895
|
+
callId: toolCall.id,
|
|
21896
|
+
input: toolCall.input,
|
|
21897
|
+
output: toolCall.output,
|
|
21898
|
+
status: "ok"
|
|
21899
|
+
},
|
|
21900
|
+
metadata: {
|
|
21901
|
+
message_index: messageIndex,
|
|
21902
|
+
tool_index: toolIndex
|
|
21903
|
+
}
|
|
21904
|
+
});
|
|
21905
|
+
if (toolCall.output !== void 0) {
|
|
21906
|
+
events.push({
|
|
21907
|
+
eventId: `${toolEventId}-result`,
|
|
21908
|
+
parentEventId: toolEventId,
|
|
21909
|
+
ordinal: ordinal++,
|
|
21910
|
+
type: "tool_result",
|
|
21911
|
+
timestamp: toolCall.endTime,
|
|
21912
|
+
tool: {
|
|
21913
|
+
name: toolCall.tool,
|
|
21914
|
+
callId: toolCall.id,
|
|
21915
|
+
output: toolCall.output,
|
|
21916
|
+
status: "ok"
|
|
21917
|
+
},
|
|
21918
|
+
metadata: {
|
|
21919
|
+
message_index: messageIndex,
|
|
21920
|
+
tool_index: toolIndex
|
|
21921
|
+
}
|
|
21922
|
+
});
|
|
21923
|
+
}
|
|
21924
|
+
}
|
|
21925
|
+
}
|
|
21926
|
+
const finalAssistantIndex = [...messages].map((message, index) => ({ message, index })).reverse().find((entry) => entry.message.role === "assistant")?.index;
|
|
21927
|
+
if (finalAssistantIndex !== void 0) {
|
|
21928
|
+
const finalMessage = messages[finalAssistantIndex];
|
|
21929
|
+
events.push({
|
|
21930
|
+
eventId: "final-response",
|
|
21931
|
+
parentEventId: `message-${finalAssistantIndex}`,
|
|
21932
|
+
ordinal: ordinal++,
|
|
21933
|
+
type: "final_response",
|
|
21934
|
+
timestamp: finalMessage.endTime ?? finalMessage.startTime ?? options.endTime,
|
|
21935
|
+
message: {
|
|
21936
|
+
...toTraceMessage(finalMessage),
|
|
21937
|
+
content: options.finalOutput ?? finalMessage.content
|
|
21938
|
+
},
|
|
21939
|
+
metadata: { message_index: finalAssistantIndex }
|
|
21940
|
+
});
|
|
21941
|
+
}
|
|
21942
|
+
if (options.error) {
|
|
21943
|
+
events.push({
|
|
21944
|
+
eventId: "error",
|
|
21945
|
+
ordinal: ordinal++,
|
|
21946
|
+
type: "error",
|
|
21947
|
+
timestamp: options.endTime,
|
|
21948
|
+
error: toTraceError(options.error)
|
|
21949
|
+
});
|
|
21950
|
+
}
|
|
21951
|
+
return {
|
|
21952
|
+
schemaVersion: TRACE_SCHEMA_VERSION,
|
|
21953
|
+
eventCount: summary.eventCount,
|
|
21954
|
+
toolCalls: summary.toolCalls,
|
|
21955
|
+
errorCount: summary.errorCount + (options.error ? 1 : 0),
|
|
21956
|
+
llmCallCount: summary.llmCallCount,
|
|
21957
|
+
...summary.toolDurations ? { toolDurations: summary.toolDurations } : {},
|
|
21958
|
+
messages,
|
|
21959
|
+
events,
|
|
21960
|
+
tokenUsage: options.tokenUsage,
|
|
21961
|
+
costUsd: options.costUsd,
|
|
21962
|
+
durationMs: options.durationMs,
|
|
21963
|
+
startTime: options.startTime ?? computed.startTime,
|
|
21964
|
+
endTime: options.endTime ?? computed.endTime,
|
|
21965
|
+
metadata: {
|
|
21966
|
+
...options.provider ? { provider: options.provider } : {},
|
|
21967
|
+
...options.target ? { target: options.target } : {},
|
|
21968
|
+
...options.testId ? { eval_case_id: options.testId } : {},
|
|
21969
|
+
...options.conversationId ? { provider_session_id: options.conversationId } : {},
|
|
21970
|
+
...options.metadata
|
|
21971
|
+
}
|
|
21972
|
+
};
|
|
21973
|
+
}
|
|
21974
|
+
function appendErrorEventToTrace(trace, error40, metadata) {
|
|
21975
|
+
return {
|
|
21976
|
+
...trace,
|
|
21977
|
+
errorCount: trace.errorCount + 1,
|
|
21978
|
+
events: [
|
|
21979
|
+
...trace.events,
|
|
21980
|
+
{
|
|
21981
|
+
eventId: `error-${trace.events.length}`,
|
|
21982
|
+
ordinal: trace.events.length,
|
|
21983
|
+
type: "error",
|
|
21984
|
+
timestamp: trace.endTime,
|
|
21985
|
+
error: toTraceError(error40),
|
|
21986
|
+
metadata
|
|
21987
|
+
}
|
|
21988
|
+
]
|
|
21989
|
+
};
|
|
21990
|
+
}
|
|
21734
21991
|
function computeTraceSummary(messages) {
|
|
21735
21992
|
const toolCallCounts = {};
|
|
21736
21993
|
const toolDurations = {};
|
|
@@ -22209,115 +22466,115 @@ var FieldAccuracyGrader = class {
|
|
|
22209
22466
|
* Evaluate a single field against the expected value.
|
|
22210
22467
|
*/
|
|
22211
22468
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
22212
|
-
const { path:
|
|
22213
|
-
const candidateValue = resolvePath(candidateData,
|
|
22214
|
-
const expectedValue = resolvePath(expectedData,
|
|
22469
|
+
const { path: path48, match, required: required2 = true, weight = 1 } = fieldConfig;
|
|
22470
|
+
const candidateValue = resolvePath(candidateData, path48);
|
|
22471
|
+
const expectedValue = resolvePath(expectedData, path48);
|
|
22215
22472
|
if (expectedValue === void 0) {
|
|
22216
22473
|
return {
|
|
22217
|
-
path:
|
|
22474
|
+
path: path48,
|
|
22218
22475
|
score: 1,
|
|
22219
22476
|
// No expected value means no comparison needed
|
|
22220
22477
|
weight,
|
|
22221
22478
|
hit: true,
|
|
22222
|
-
message: `${
|
|
22479
|
+
message: `${path48}: no expected value`
|
|
22223
22480
|
};
|
|
22224
22481
|
}
|
|
22225
22482
|
if (candidateValue === void 0) {
|
|
22226
22483
|
if (required2) {
|
|
22227
22484
|
return {
|
|
22228
|
-
path:
|
|
22485
|
+
path: path48,
|
|
22229
22486
|
score: 0,
|
|
22230
22487
|
weight,
|
|
22231
22488
|
hit: false,
|
|
22232
|
-
message: `${
|
|
22489
|
+
message: `${path48} (required, missing)`
|
|
22233
22490
|
};
|
|
22234
22491
|
}
|
|
22235
22492
|
return {
|
|
22236
|
-
path:
|
|
22493
|
+
path: path48,
|
|
22237
22494
|
score: 1,
|
|
22238
22495
|
// Don't penalize missing optional fields
|
|
22239
22496
|
weight: 0,
|
|
22240
22497
|
// Zero weight means it won't affect the score
|
|
22241
22498
|
hit: true,
|
|
22242
|
-
message: `${
|
|
22499
|
+
message: `${path48}: optional field missing`
|
|
22243
22500
|
};
|
|
22244
22501
|
}
|
|
22245
22502
|
switch (match) {
|
|
22246
22503
|
case "exact":
|
|
22247
|
-
return this.compareExact(
|
|
22504
|
+
return this.compareExact(path48, candidateValue, expectedValue, weight);
|
|
22248
22505
|
case "numeric_tolerance":
|
|
22249
22506
|
return this.compareNumericTolerance(
|
|
22250
|
-
|
|
22507
|
+
path48,
|
|
22251
22508
|
candidateValue,
|
|
22252
22509
|
expectedValue,
|
|
22253
22510
|
fieldConfig,
|
|
22254
22511
|
weight
|
|
22255
22512
|
);
|
|
22256
22513
|
case "date":
|
|
22257
|
-
return this.compareDate(
|
|
22514
|
+
return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
|
|
22258
22515
|
default:
|
|
22259
22516
|
return {
|
|
22260
|
-
path:
|
|
22517
|
+
path: path48,
|
|
22261
22518
|
score: 0,
|
|
22262
22519
|
weight,
|
|
22263
22520
|
hit: false,
|
|
22264
|
-
message: `${
|
|
22521
|
+
message: `${path48}: unknown match type "${match}"`
|
|
22265
22522
|
};
|
|
22266
22523
|
}
|
|
22267
22524
|
}
|
|
22268
22525
|
/**
|
|
22269
22526
|
* Exact equality comparison.
|
|
22270
22527
|
*/
|
|
22271
|
-
compareExact(
|
|
22528
|
+
compareExact(path48, candidateValue, expectedValue, weight) {
|
|
22272
22529
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
22273
22530
|
return {
|
|
22274
|
-
path:
|
|
22531
|
+
path: path48,
|
|
22275
22532
|
score: 1,
|
|
22276
22533
|
weight,
|
|
22277
22534
|
hit: true,
|
|
22278
|
-
message:
|
|
22535
|
+
message: path48
|
|
22279
22536
|
};
|
|
22280
22537
|
}
|
|
22281
22538
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
22282
22539
|
return {
|
|
22283
|
-
path:
|
|
22540
|
+
path: path48,
|
|
22284
22541
|
score: 0,
|
|
22285
22542
|
weight,
|
|
22286
22543
|
hit: false,
|
|
22287
|
-
message: `${
|
|
22544
|
+
message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
22288
22545
|
};
|
|
22289
22546
|
}
|
|
22290
22547
|
return {
|
|
22291
|
-
path:
|
|
22548
|
+
path: path48,
|
|
22292
22549
|
score: 0,
|
|
22293
22550
|
weight,
|
|
22294
22551
|
hit: false,
|
|
22295
|
-
message: `${
|
|
22552
|
+
message: `${path48} (value mismatch)`
|
|
22296
22553
|
};
|
|
22297
22554
|
}
|
|
22298
22555
|
/**
|
|
22299
22556
|
* Numeric comparison with absolute or relative tolerance.
|
|
22300
22557
|
*/
|
|
22301
|
-
compareNumericTolerance(
|
|
22558
|
+
compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
22302
22559
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
22303
22560
|
const candidateNum = toNumber(candidateValue);
|
|
22304
22561
|
const expectedNum = toNumber(expectedValue);
|
|
22305
22562
|
if (candidateNum === null || expectedNum === null) {
|
|
22306
22563
|
return {
|
|
22307
|
-
path:
|
|
22564
|
+
path: path48,
|
|
22308
22565
|
score: 0,
|
|
22309
22566
|
weight,
|
|
22310
22567
|
hit: false,
|
|
22311
|
-
message: `${
|
|
22568
|
+
message: `${path48} (non-numeric value)`
|
|
22312
22569
|
};
|
|
22313
22570
|
}
|
|
22314
22571
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
22315
22572
|
return {
|
|
22316
|
-
path:
|
|
22573
|
+
path: path48,
|
|
22317
22574
|
score: 0,
|
|
22318
22575
|
weight,
|
|
22319
22576
|
hit: false,
|
|
22320
|
-
message: `${
|
|
22577
|
+
message: `${path48} (invalid numeric value)`
|
|
22321
22578
|
};
|
|
22322
22579
|
}
|
|
22323
22580
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -22330,61 +22587,61 @@ var FieldAccuracyGrader = class {
|
|
|
22330
22587
|
}
|
|
22331
22588
|
if (withinTolerance) {
|
|
22332
22589
|
return {
|
|
22333
|
-
path:
|
|
22590
|
+
path: path48,
|
|
22334
22591
|
score: 1,
|
|
22335
22592
|
weight,
|
|
22336
22593
|
hit: true,
|
|
22337
|
-
message: `${
|
|
22594
|
+
message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
|
|
22338
22595
|
};
|
|
22339
22596
|
}
|
|
22340
22597
|
return {
|
|
22341
|
-
path:
|
|
22598
|
+
path: path48,
|
|
22342
22599
|
score: 0,
|
|
22343
22600
|
weight,
|
|
22344
22601
|
hit: false,
|
|
22345
|
-
message: `${
|
|
22602
|
+
message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
22346
22603
|
};
|
|
22347
22604
|
}
|
|
22348
22605
|
/**
|
|
22349
22606
|
* Date comparison with format normalization.
|
|
22350
22607
|
*/
|
|
22351
|
-
compareDate(
|
|
22608
|
+
compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
22352
22609
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
22353
22610
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
22354
22611
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
22355
22612
|
if (candidateDate === null) {
|
|
22356
22613
|
return {
|
|
22357
|
-
path:
|
|
22614
|
+
path: path48,
|
|
22358
22615
|
score: 0,
|
|
22359
22616
|
weight,
|
|
22360
22617
|
hit: false,
|
|
22361
|
-
message: `${
|
|
22618
|
+
message: `${path48} (unparseable candidate date)`
|
|
22362
22619
|
};
|
|
22363
22620
|
}
|
|
22364
22621
|
if (expectedDate === null) {
|
|
22365
22622
|
return {
|
|
22366
|
-
path:
|
|
22623
|
+
path: path48,
|
|
22367
22624
|
score: 0,
|
|
22368
22625
|
weight,
|
|
22369
22626
|
hit: false,
|
|
22370
|
-
message: `${
|
|
22627
|
+
message: `${path48} (unparseable expected date)`
|
|
22371
22628
|
};
|
|
22372
22629
|
}
|
|
22373
22630
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
22374
22631
|
return {
|
|
22375
|
-
path:
|
|
22632
|
+
path: path48,
|
|
22376
22633
|
score: 1,
|
|
22377
22634
|
weight,
|
|
22378
22635
|
hit: true,
|
|
22379
|
-
message:
|
|
22636
|
+
message: path48
|
|
22380
22637
|
};
|
|
22381
22638
|
}
|
|
22382
22639
|
return {
|
|
22383
|
-
path:
|
|
22640
|
+
path: path48,
|
|
22384
22641
|
score: 0,
|
|
22385
22642
|
weight,
|
|
22386
22643
|
hit: false,
|
|
22387
|
-
message: `${
|
|
22644
|
+
message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
22388
22645
|
};
|
|
22389
22646
|
}
|
|
22390
22647
|
/**
|
|
@@ -22417,11 +22674,11 @@ var FieldAccuracyGrader = class {
|
|
|
22417
22674
|
};
|
|
22418
22675
|
}
|
|
22419
22676
|
};
|
|
22420
|
-
function resolvePath(obj,
|
|
22421
|
-
if (!
|
|
22677
|
+
function resolvePath(obj, path48) {
|
|
22678
|
+
if (!path48 || !obj) {
|
|
22422
22679
|
return void 0;
|
|
22423
22680
|
}
|
|
22424
|
-
const parts =
|
|
22681
|
+
const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
22425
22682
|
let current = obj;
|
|
22426
22683
|
for (const part of parts) {
|
|
22427
22684
|
if (current === null || current === void 0) {
|
|
@@ -22955,8 +23212,8 @@ var TokenUsageGrader = class {
|
|
|
22955
23212
|
};
|
|
22956
23213
|
}
|
|
22957
23214
|
};
|
|
22958
|
-
function getNestedValue(obj,
|
|
22959
|
-
const parts =
|
|
23215
|
+
function getNestedValue(obj, path48) {
|
|
23216
|
+
const parts = path48.split(".");
|
|
22960
23217
|
let current = obj;
|
|
22961
23218
|
for (const part of parts) {
|
|
22962
23219
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -27515,7 +27772,11 @@ ${prompt}` : prompt;
|
|
|
27515
27772
|
env.AZURE_OPENAI_API_KEY = this.config.apiKey;
|
|
27516
27773
|
}
|
|
27517
27774
|
if (this.config.baseUrl) {
|
|
27518
|
-
|
|
27775
|
+
if (/^https?:\/\//.test(this.config.baseUrl)) {
|
|
27776
|
+
env.AZURE_OPENAI_BASE_URL = this.config.baseUrl;
|
|
27777
|
+
} else {
|
|
27778
|
+
env.AZURE_OPENAI_RESOURCE_NAME = extractAzureResourceName(this.config.baseUrl);
|
|
27779
|
+
}
|
|
27519
27780
|
}
|
|
27520
27781
|
} else {
|
|
27521
27782
|
if (this.config.apiKey) {
|
|
@@ -28774,6 +29035,399 @@ var ProviderRegistry = class {
|
|
|
28774
29035
|
return factory(target);
|
|
28775
29036
|
}
|
|
28776
29037
|
};
|
|
29038
|
+
var REPLAY_FIXTURE_SCHEMA_VERSION = "agentv.replay_fixture.v1";
|
|
29039
|
+
var TokenUsageWireSchema2 = external_exports.object({
|
|
29040
|
+
input: external_exports.number().nonnegative(),
|
|
29041
|
+
output: external_exports.number().nonnegative(),
|
|
29042
|
+
cached: external_exports.number().nonnegative().optional(),
|
|
29043
|
+
reasoning: external_exports.number().nonnegative().optional()
|
|
29044
|
+
}).strict();
|
|
29045
|
+
var ToolCallWireSchema = external_exports.object({
|
|
29046
|
+
tool: external_exports.string().min(1),
|
|
29047
|
+
input: external_exports.unknown().optional(),
|
|
29048
|
+
output: external_exports.unknown().optional(),
|
|
29049
|
+
id: external_exports.string().optional(),
|
|
29050
|
+
start_time: external_exports.string().optional(),
|
|
29051
|
+
end_time: external_exports.string().optional(),
|
|
29052
|
+
duration_ms: external_exports.number().nonnegative().optional()
|
|
29053
|
+
}).strict();
|
|
29054
|
+
var MessageWireSchema = external_exports.object({
|
|
29055
|
+
role: external_exports.string().min(1),
|
|
29056
|
+
name: external_exports.string().optional(),
|
|
29057
|
+
content: external_exports.unknown().optional(),
|
|
29058
|
+
tool_calls: external_exports.array(ToolCallWireSchema).optional(),
|
|
29059
|
+
start_time: external_exports.string().optional(),
|
|
29060
|
+
end_time: external_exports.string().optional(),
|
|
29061
|
+
duration_ms: external_exports.number().nonnegative().optional(),
|
|
29062
|
+
metadata: external_exports.record(external_exports.unknown()).optional(),
|
|
29063
|
+
token_usage: TokenUsageWireSchema2.optional()
|
|
29064
|
+
}).strict();
|
|
29065
|
+
var ReplayFixtureWireSchema = external_exports.object({
|
|
29066
|
+
schema_version: external_exports.literal(REPLAY_FIXTURE_SCHEMA_VERSION),
|
|
29067
|
+
suite: external_exports.string().min(1),
|
|
29068
|
+
eval_path: external_exports.string().min(1).optional(),
|
|
29069
|
+
test_id: external_exports.string().min(1),
|
|
29070
|
+
source_target: external_exports.string().min(1),
|
|
29071
|
+
attempt: external_exports.number().int().min(0).optional(),
|
|
29072
|
+
variant: external_exports.string().min(1).nullable().optional(),
|
|
29073
|
+
fixture_id: external_exports.string().min(1).optional(),
|
|
29074
|
+
recorded_at: external_exports.string().optional(),
|
|
29075
|
+
source: external_exports.record(external_exports.unknown()).optional(),
|
|
29076
|
+
redaction: external_exports.record(external_exports.unknown()).optional(),
|
|
29077
|
+
output: external_exports.array(MessageWireSchema),
|
|
29078
|
+
transcript: external_exports.unknown().optional(),
|
|
29079
|
+
token_usage: TokenUsageWireSchema2.optional(),
|
|
29080
|
+
cost_usd: external_exports.number().nonnegative().optional(),
|
|
29081
|
+
duration_ms: external_exports.number().nonnegative().optional(),
|
|
29082
|
+
start_time: external_exports.string().optional(),
|
|
29083
|
+
end_time: external_exports.string().optional()
|
|
29084
|
+
}).strict();
|
|
29085
|
+
var appendQueues = /* @__PURE__ */ new Map();
|
|
29086
|
+
function fromWireRecord(wire) {
|
|
29087
|
+
return {
|
|
29088
|
+
schemaVersion: wire.schema_version,
|
|
29089
|
+
suite: wire.suite,
|
|
29090
|
+
evalPath: wire.eval_path,
|
|
29091
|
+
testId: wire.test_id,
|
|
29092
|
+
sourceTarget: wire.source_target,
|
|
29093
|
+
attempt: wire.attempt ?? 0,
|
|
29094
|
+
variant: wire.variant ?? void 0,
|
|
29095
|
+
fixtureId: wire.fixture_id,
|
|
29096
|
+
recordedAt: wire.recorded_at,
|
|
29097
|
+
source: wire.source,
|
|
29098
|
+
redaction: wire.redaction,
|
|
29099
|
+
output: wire.output.map(fromWireMessage),
|
|
29100
|
+
transcript: wire.transcript,
|
|
29101
|
+
tokenUsage: wire.token_usage,
|
|
29102
|
+
costUsd: wire.cost_usd,
|
|
29103
|
+
durationMs: wire.duration_ms,
|
|
29104
|
+
startTime: wire.start_time,
|
|
29105
|
+
endTime: wire.end_time
|
|
29106
|
+
};
|
|
29107
|
+
}
|
|
29108
|
+
function fromWireMessage(wire) {
|
|
29109
|
+
return {
|
|
29110
|
+
role: wire.role,
|
|
29111
|
+
name: wire.name,
|
|
29112
|
+
content: wire.content,
|
|
29113
|
+
toolCalls: wire.tool_calls?.map(fromWireToolCall),
|
|
29114
|
+
startTime: wire.start_time,
|
|
29115
|
+
endTime: wire.end_time,
|
|
29116
|
+
durationMs: wire.duration_ms,
|
|
29117
|
+
metadata: wire.metadata,
|
|
29118
|
+
tokenUsage: wire.token_usage
|
|
29119
|
+
};
|
|
29120
|
+
}
|
|
29121
|
+
function fromWireToolCall(wire) {
|
|
29122
|
+
return {
|
|
29123
|
+
tool: wire.tool,
|
|
29124
|
+
input: wire.input,
|
|
29125
|
+
output: wire.output,
|
|
29126
|
+
id: wire.id,
|
|
29127
|
+
startTime: wire.start_time,
|
|
29128
|
+
endTime: wire.end_time,
|
|
29129
|
+
durationMs: wire.duration_ms
|
|
29130
|
+
};
|
|
29131
|
+
}
|
|
29132
|
+
function toWireRecord(record2) {
|
|
29133
|
+
const wire = {
|
|
29134
|
+
schema_version: record2.schemaVersion,
|
|
29135
|
+
suite: record2.suite,
|
|
29136
|
+
eval_path: record2.evalPath,
|
|
29137
|
+
test_id: record2.testId,
|
|
29138
|
+
source_target: record2.sourceTarget,
|
|
29139
|
+
attempt: record2.attempt,
|
|
29140
|
+
variant: record2.variant ?? null,
|
|
29141
|
+
fixture_id: record2.fixtureId,
|
|
29142
|
+
recorded_at: record2.recordedAt,
|
|
29143
|
+
source: record2.source,
|
|
29144
|
+
redaction: record2.redaction,
|
|
29145
|
+
output: record2.output.map(toWireMessage),
|
|
29146
|
+
transcript: record2.transcript,
|
|
29147
|
+
token_usage: record2.tokenUsage,
|
|
29148
|
+
cost_usd: record2.costUsd,
|
|
29149
|
+
duration_ms: record2.durationMs,
|
|
29150
|
+
start_time: record2.startTime,
|
|
29151
|
+
end_time: record2.endTime
|
|
29152
|
+
};
|
|
29153
|
+
const parsed = ReplayFixtureWireSchema.parse(dropUndefined(wire));
|
|
29154
|
+
return parsed;
|
|
29155
|
+
}
|
|
29156
|
+
function toWireMessage(message) {
|
|
29157
|
+
return {
|
|
29158
|
+
role: message.role,
|
|
29159
|
+
name: message.name,
|
|
29160
|
+
content: message.content,
|
|
29161
|
+
tool_calls: message.toolCalls?.map(toWireToolCall),
|
|
29162
|
+
start_time: message.startTime,
|
|
29163
|
+
end_time: message.endTime,
|
|
29164
|
+
duration_ms: message.durationMs,
|
|
29165
|
+
metadata: message.metadata,
|
|
29166
|
+
token_usage: message.tokenUsage
|
|
29167
|
+
};
|
|
29168
|
+
}
|
|
29169
|
+
function toWireToolCall(toolCall) {
|
|
29170
|
+
return {
|
|
29171
|
+
tool: toolCall.tool,
|
|
29172
|
+
input: toolCall.input,
|
|
29173
|
+
output: toolCall.output,
|
|
29174
|
+
id: toolCall.id,
|
|
29175
|
+
start_time: toolCall.startTime,
|
|
29176
|
+
end_time: toolCall.endTime,
|
|
29177
|
+
duration_ms: toolCall.durationMs
|
|
29178
|
+
};
|
|
29179
|
+
}
|
|
29180
|
+
function dropUndefined(value) {
|
|
29181
|
+
return Object.fromEntries(Object.entries(value).filter(([, entry]) => entry !== void 0));
|
|
29182
|
+
}
|
|
29183
|
+
function formatZodError(error40) {
|
|
29184
|
+
return error40.errors.map((issue2) => {
|
|
29185
|
+
const location = issue2.path.length > 0 ? issue2.path.join(".") : "<record>";
|
|
29186
|
+
return `${location}: ${issue2.message}`;
|
|
29187
|
+
}).join("; ");
|
|
29188
|
+
}
|
|
29189
|
+
async function readReplayFixtureRecords(fixturesPath) {
|
|
29190
|
+
let raw;
|
|
29191
|
+
try {
|
|
29192
|
+
raw = await readFile6(fixturesPath, "utf8");
|
|
29193
|
+
} catch (error40) {
|
|
29194
|
+
const reason = error40 instanceof Error ? error40.message : String(error40);
|
|
29195
|
+
throw new Error(`Replay fixture file not found or unreadable: ${fixturesPath}: ${reason}`);
|
|
29196
|
+
}
|
|
29197
|
+
const records = [];
|
|
29198
|
+
const lines = raw.split(/\r?\n/);
|
|
29199
|
+
for (let i = 0; i < lines.length; i++) {
|
|
29200
|
+
const line = lines[i].trim();
|
|
29201
|
+
if (line.length === 0) {
|
|
29202
|
+
continue;
|
|
29203
|
+
}
|
|
29204
|
+
let parsed;
|
|
29205
|
+
try {
|
|
29206
|
+
parsed = JSON.parse(line);
|
|
29207
|
+
} catch (error40) {
|
|
29208
|
+
const reason = error40 instanceof Error ? error40.message : String(error40);
|
|
29209
|
+
throw new Error(`Invalid replay fixture JSONL at ${fixturesPath}:${i + 1}: ${reason}`);
|
|
29210
|
+
}
|
|
29211
|
+
const result = ReplayFixtureWireSchema.safeParse(parsed);
|
|
29212
|
+
if (!result.success) {
|
|
29213
|
+
throw new Error(
|
|
29214
|
+
`Invalid replay fixture record at ${fixturesPath}:${i + 1}: ${formatZodError(result.error)}`
|
|
29215
|
+
);
|
|
29216
|
+
}
|
|
29217
|
+
records.push(fromWireRecord(result.data));
|
|
29218
|
+
}
|
|
29219
|
+
return records;
|
|
29220
|
+
}
|
|
29221
|
+
function serializeReplayFixtureRecord(record2) {
|
|
29222
|
+
return JSON.stringify(toWireRecord(record2));
|
|
29223
|
+
}
|
|
29224
|
+
async function appendReplayFixtureRecord(fixturesPath, record2) {
|
|
29225
|
+
const absolutePath = path17.resolve(fixturesPath);
|
|
29226
|
+
const previous = appendQueues.get(absolutePath) ?? Promise.resolve();
|
|
29227
|
+
const next = previous.then(async () => {
|
|
29228
|
+
await mkdir9(path17.dirname(absolutePath), { recursive: true });
|
|
29229
|
+
await writeFile4(absolutePath, `${serializeReplayFixtureRecord(record2)}
|
|
29230
|
+
`, {
|
|
29231
|
+
encoding: "utf8",
|
|
29232
|
+
flag: "a"
|
|
29233
|
+
});
|
|
29234
|
+
});
|
|
29235
|
+
appendQueues.set(
|
|
29236
|
+
absolutePath,
|
|
29237
|
+
next.catch(() => {
|
|
29238
|
+
})
|
|
29239
|
+
);
|
|
29240
|
+
await next;
|
|
29241
|
+
}
|
|
29242
|
+
function findReplayFixtureRecord(records, lookup) {
|
|
29243
|
+
const matches = records.filter((record2) => replayRecordMatches(record2, lookup));
|
|
29244
|
+
if (matches.length === 1) {
|
|
29245
|
+
return matches[0];
|
|
29246
|
+
}
|
|
29247
|
+
const key = formatLookupKey(lookup);
|
|
29248
|
+
if (matches.length === 0) {
|
|
29249
|
+
throw new Error(`Replay fixture lookup found no record for ${key}`);
|
|
29250
|
+
}
|
|
29251
|
+
throw new Error(`Replay fixture lookup found ${matches.length} duplicate records for ${key}`);
|
|
29252
|
+
}
|
|
29253
|
+
function replayRecordMatches(record2, lookup) {
|
|
29254
|
+
if (lookup.suite && record2.suite !== lookup.suite) {
|
|
29255
|
+
return false;
|
|
29256
|
+
}
|
|
29257
|
+
if (!lookup.suite && !lookup.evalPath) {
|
|
29258
|
+
throw new Error("Replay fixture lookup requires suite or eval_path identity");
|
|
29259
|
+
}
|
|
29260
|
+
if (record2.evalPath && !lookup.evalPath) {
|
|
29261
|
+
return false;
|
|
29262
|
+
}
|
|
29263
|
+
if (record2.evalPath && lookup.evalPath && !sameEvalPath(record2.evalPath, lookup.evalPath)) {
|
|
29264
|
+
return false;
|
|
29265
|
+
}
|
|
29266
|
+
return record2.testId === lookup.testId && record2.sourceTarget === lookup.sourceTarget && record2.attempt === (lookup.attempt ?? 0) && (record2.variant ?? null) === (lookup.variant ?? null);
|
|
29267
|
+
}
|
|
29268
|
+
function normalizeEvalPath(value) {
|
|
29269
|
+
return value.replace(/\\/g, "/").replace(/^\.\//, "").replace(/\/+/g, "/");
|
|
29270
|
+
}
|
|
29271
|
+
function sameEvalPath(recordPath, lookupPath) {
|
|
29272
|
+
const record2 = normalizeEvalPath(recordPath);
|
|
29273
|
+
const lookup = normalizeEvalPath(lookupPath);
|
|
29274
|
+
if (record2 === lookup) {
|
|
29275
|
+
return true;
|
|
29276
|
+
}
|
|
29277
|
+
return path17.isAbsolute(lookupPath) && lookup.endsWith(`/${record2}`);
|
|
29278
|
+
}
|
|
29279
|
+
function formatLookupKey(lookup) {
|
|
29280
|
+
const parts = [
|
|
29281
|
+
lookup.suite ? `suite=${lookup.suite}` : void 0,
|
|
29282
|
+
lookup.evalPath ? `eval_path=${lookup.evalPath}` : void 0,
|
|
29283
|
+
`test_id=${lookup.testId}`,
|
|
29284
|
+
`source_target=${lookup.sourceTarget}`,
|
|
29285
|
+
`attempt=${lookup.attempt ?? 0}`,
|
|
29286
|
+
`variant=${lookup.variant ?? "<none>"}`
|
|
29287
|
+
].filter((part) => part !== void 0);
|
|
29288
|
+
return parts.join(" ");
|
|
29289
|
+
}
|
|
29290
|
+
function replayFixtureRecordToProviderResponse(record2) {
|
|
29291
|
+
return {
|
|
29292
|
+
output: record2.output,
|
|
29293
|
+
tokenUsage: record2.tokenUsage,
|
|
29294
|
+
costUsd: record2.costUsd,
|
|
29295
|
+
durationMs: record2.durationMs,
|
|
29296
|
+
startTime: record2.startTime,
|
|
29297
|
+
endTime: record2.endTime,
|
|
29298
|
+
raw: {
|
|
29299
|
+
replay_fixture: dropUndefined({
|
|
29300
|
+
fixture_id: record2.fixtureId,
|
|
29301
|
+
suite: record2.suite,
|
|
29302
|
+
eval_path: record2.evalPath,
|
|
29303
|
+
test_id: record2.testId,
|
|
29304
|
+
source_target: record2.sourceTarget,
|
|
29305
|
+
attempt: record2.attempt,
|
|
29306
|
+
variant: record2.variant,
|
|
29307
|
+
source: record2.source,
|
|
29308
|
+
redaction: record2.redaction,
|
|
29309
|
+
transcript: record2.transcript
|
|
29310
|
+
})
|
|
29311
|
+
}
|
|
29312
|
+
};
|
|
29313
|
+
}
|
|
29314
|
+
function buildReplayFixtureRecord({
|
|
29315
|
+
evalCase,
|
|
29316
|
+
evalFilePath,
|
|
29317
|
+
repoRoot,
|
|
29318
|
+
target,
|
|
29319
|
+
sourceTarget,
|
|
29320
|
+
attempt,
|
|
29321
|
+
variant,
|
|
29322
|
+
response,
|
|
29323
|
+
now = () => /* @__PURE__ */ new Date()
|
|
29324
|
+
}) {
|
|
29325
|
+
const suite = evalCase.suite?.trim();
|
|
29326
|
+
if (!suite) {
|
|
29327
|
+
throw new Error(`Cannot record replay fixture for test '${evalCase.id}': suite is missing`);
|
|
29328
|
+
}
|
|
29329
|
+
const evalPath = path17.relative(repoRoot, path17.resolve(evalFilePath)).replace(/\\/g, "/");
|
|
29330
|
+
const resolvedSourceTarget = sourceTarget?.trim() || target.name;
|
|
29331
|
+
const fixtureId = buildFixtureId({
|
|
29332
|
+
suite,
|
|
29333
|
+
evalPath,
|
|
29334
|
+
testId: evalCase.id,
|
|
29335
|
+
sourceTarget: resolvedSourceTarget,
|
|
29336
|
+
attempt,
|
|
29337
|
+
variant
|
|
29338
|
+
});
|
|
29339
|
+
return {
|
|
29340
|
+
schemaVersion: REPLAY_FIXTURE_SCHEMA_VERSION,
|
|
29341
|
+
suite,
|
|
29342
|
+
evalPath,
|
|
29343
|
+
testId: evalCase.id,
|
|
29344
|
+
sourceTarget: resolvedSourceTarget,
|
|
29345
|
+
attempt,
|
|
29346
|
+
variant,
|
|
29347
|
+
fixtureId,
|
|
29348
|
+
recordedAt: now().toISOString(),
|
|
29349
|
+
source: buildSourceMetadata(target, resolvedSourceTarget),
|
|
29350
|
+
output: response.output ?? [],
|
|
29351
|
+
transcript: extractTranscript(response.raw),
|
|
29352
|
+
tokenUsage: response.tokenUsage,
|
|
29353
|
+
costUsd: response.costUsd,
|
|
29354
|
+
durationMs: response.durationMs,
|
|
29355
|
+
startTime: response.startTime,
|
|
29356
|
+
endTime: response.endTime
|
|
29357
|
+
};
|
|
29358
|
+
}
|
|
29359
|
+
function buildFixtureId(input) {
|
|
29360
|
+
const stable = [
|
|
29361
|
+
input.suite,
|
|
29362
|
+
input.evalPath,
|
|
29363
|
+
input.testId,
|
|
29364
|
+
input.sourceTarget,
|
|
29365
|
+
String(input.attempt),
|
|
29366
|
+
input.variant ?? ""
|
|
29367
|
+
].join("\0");
|
|
29368
|
+
const digest = createHash("sha256").update(stable).digest("hex").slice(0, 12);
|
|
29369
|
+
return `${input.sourceTarget}-${input.testId}-${digest}`;
|
|
29370
|
+
}
|
|
29371
|
+
function buildSourceMetadata(target, sourceTarget) {
|
|
29372
|
+
return dropUndefined({
|
|
29373
|
+
provider: target.kind,
|
|
29374
|
+
target_name: sourceTarget,
|
|
29375
|
+
resolved_target: target.name,
|
|
29376
|
+
model: extractModelName(target)
|
|
29377
|
+
});
|
|
29378
|
+
}
|
|
29379
|
+
function extractModelName(target) {
|
|
29380
|
+
const config2 = target.config;
|
|
29381
|
+
if (typeof config2.model === "string") {
|
|
29382
|
+
return config2.model;
|
|
29383
|
+
}
|
|
29384
|
+
if (typeof config2.deploymentName === "string") {
|
|
29385
|
+
return config2.deploymentName;
|
|
29386
|
+
}
|
|
29387
|
+
return void 0;
|
|
29388
|
+
}
|
|
29389
|
+
function extractTranscript(raw) {
|
|
29390
|
+
if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
|
|
29391
|
+
return void 0;
|
|
29392
|
+
}
|
|
29393
|
+
const transcript = raw.transcript;
|
|
29394
|
+
return transcript;
|
|
29395
|
+
}
|
|
29396
|
+
var ReplayProvider = class {
|
|
29397
|
+
id;
|
|
29398
|
+
kind = "replay";
|
|
29399
|
+
targetName;
|
|
29400
|
+
supportsBatch = true;
|
|
29401
|
+
config;
|
|
29402
|
+
constructor(targetName, config2) {
|
|
29403
|
+
this.id = `replay:${targetName}`;
|
|
29404
|
+
this.targetName = targetName;
|
|
29405
|
+
this.config = config2;
|
|
29406
|
+
}
|
|
29407
|
+
async invoke(request) {
|
|
29408
|
+
const records = await readReplayFixtureRecords(this.config.fixturesPath);
|
|
29409
|
+
return this.responseForRequest(records, request);
|
|
29410
|
+
}
|
|
29411
|
+
async invokeBatch(requests) {
|
|
29412
|
+
const records = await readReplayFixtureRecords(this.config.fixturesPath);
|
|
29413
|
+
return requests.map((request) => this.responseForRequest(records, request));
|
|
29414
|
+
}
|
|
29415
|
+
responseForRequest(records, request) {
|
|
29416
|
+
const testId = request.evalCaseId;
|
|
29417
|
+
if (!testId) {
|
|
29418
|
+
throw new Error("Replay provider requires evalCaseId on provider requests");
|
|
29419
|
+
}
|
|
29420
|
+
const record2 = findReplayFixtureRecord(records, {
|
|
29421
|
+
suite: this.config.suite ?? request.suite,
|
|
29422
|
+
evalPath: this.config.evalPath ?? request.evalFilePath,
|
|
29423
|
+
testId,
|
|
29424
|
+
sourceTarget: this.config.sourceTarget,
|
|
29425
|
+
attempt: request.attempt ?? 0,
|
|
29426
|
+
variant: this.config.variant
|
|
29427
|
+
});
|
|
29428
|
+
return replayFixtureRecordToProviderResponse(record2);
|
|
29429
|
+
}
|
|
29430
|
+
};
|
|
28777
29431
|
async function pathExists(target) {
|
|
28778
29432
|
try {
|
|
28779
29433
|
await access3(target, constants3.F_OK);
|
|
@@ -28783,13 +29437,13 @@ async function pathExists(target) {
|
|
|
28783
29437
|
}
|
|
28784
29438
|
}
|
|
28785
29439
|
async function ensureDir(target) {
|
|
28786
|
-
await
|
|
29440
|
+
await mkdir10(target, { recursive: true });
|
|
28787
29441
|
}
|
|
28788
29442
|
async function readDirEntries(target) {
|
|
28789
29443
|
const entries = await readdir3(target, { withFileTypes: true });
|
|
28790
29444
|
return entries.map((entry) => ({
|
|
28791
29445
|
name: entry.name,
|
|
28792
|
-
absolutePath:
|
|
29446
|
+
absolutePath: path18.join(target, entry.name),
|
|
28793
29447
|
isDirectory: entry.isDirectory()
|
|
28794
29448
|
}));
|
|
28795
29449
|
}
|
|
@@ -28803,7 +29457,7 @@ async function removeIfExists(target) {
|
|
|
28803
29457
|
}
|
|
28804
29458
|
}
|
|
28805
29459
|
function pathToFileUri2(filePath) {
|
|
28806
|
-
const absolutePath =
|
|
29460
|
+
const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
|
|
28807
29461
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
28808
29462
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
28809
29463
|
return `file:///${normalizedPath}`;
|
|
@@ -28895,8 +29549,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
28895
29549
|
});
|
|
28896
29550
|
}
|
|
28897
29551
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
28898
|
-
const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${
|
|
28899
|
-
const responseList = responseFiles.map((file2) => `"${
|
|
29552
|
+
const requestLines = requestFiles.map((file2, index) => `${index + 1}. messages/${path20.basename(file2)}`).join("\n");
|
|
29553
|
+
const responseList = responseFiles.map((file2) => `"${path20.basename(file2)}"`).join(", ");
|
|
28900
29554
|
return renderTemplate2(templateContent, {
|
|
28901
29555
|
requestFiles: requestLines,
|
|
28902
29556
|
responseList
|
|
@@ -28935,7 +29589,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
28935
29589
|
const maxAttempts = 10;
|
|
28936
29590
|
while (attempts < maxAttempts) {
|
|
28937
29591
|
try {
|
|
28938
|
-
const content = await
|
|
29592
|
+
const content = await readFile7(responseFileFinal, { encoding: "utf8" });
|
|
28939
29593
|
if (!silent) {
|
|
28940
29594
|
process.stdout.write(`${content}
|
|
28941
29595
|
`);
|
|
@@ -28956,7 +29610,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
28956
29610
|
}
|
|
28957
29611
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
28958
29612
|
if (!silent) {
|
|
28959
|
-
const fileList = responseFilesFinal.map((file2) =>
|
|
29613
|
+
const fileList = responseFilesFinal.map((file2) => path21.basename(file2)).join(", ");
|
|
28960
29614
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
28961
29615
|
}
|
|
28962
29616
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -28965,7 +29619,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
28965
29619
|
while (pending.size > 0) {
|
|
28966
29620
|
if (Date.now() >= deadline) {
|
|
28967
29621
|
if (!silent) {
|
|
28968
|
-
const remaining = [...pending].map((f) =>
|
|
29622
|
+
const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
|
|
28969
29623
|
console.error(
|
|
28970
29624
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
28971
29625
|
);
|
|
@@ -28992,7 +29646,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
28992
29646
|
const maxAttempts = 10;
|
|
28993
29647
|
while (attempts < maxAttempts) {
|
|
28994
29648
|
try {
|
|
28995
|
-
const content = await
|
|
29649
|
+
const content = await readFile7(file2, { encoding: "utf8" });
|
|
28996
29650
|
if (!silent) {
|
|
28997
29651
|
process.stdout.write(`${content}
|
|
28998
29652
|
`);
|
|
@@ -29016,7 +29670,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
29016
29670
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
29017
29671
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
29018
29672
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
29019
|
-
return
|
|
29673
|
+
return path222.join(getSubagentsRoot(), folder);
|
|
29020
29674
|
}
|
|
29021
29675
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
29022
29676
|
var execAsync3 = promisify3(exec);
|
|
@@ -29082,12 +29736,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
29082
29736
|
await raceSpawnError(child);
|
|
29083
29737
|
return true;
|
|
29084
29738
|
}
|
|
29085
|
-
const aliveFile =
|
|
29739
|
+
const aliveFile = path23.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
29086
29740
|
await removeIfExists(aliveFile);
|
|
29087
|
-
const githubAgentsDir =
|
|
29088
|
-
await
|
|
29089
|
-
const wakeupDst =
|
|
29090
|
-
await
|
|
29741
|
+
const githubAgentsDir = path23.join(subagentDir, ".github", "agents");
|
|
29742
|
+
await mkdir11(githubAgentsDir, { recursive: true });
|
|
29743
|
+
const wakeupDst = path23.join(githubAgentsDir, "wakeup.md");
|
|
29744
|
+
await writeFile5(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
29091
29745
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
29092
29746
|
label: "open-workspace"
|
|
29093
29747
|
});
|
|
@@ -29099,7 +29753,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
29099
29753
|
"chat",
|
|
29100
29754
|
"-m",
|
|
29101
29755
|
wakeupChatId,
|
|
29102
|
-
`create a file named .alive in the ${
|
|
29756
|
+
`create a file named .alive in the ${path23.basename(subagentDir)} folder`
|
|
29103
29757
|
];
|
|
29104
29758
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
29105
29759
|
await raceSpawnError(wakeupChild);
|
|
@@ -29114,27 +29768,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
29114
29768
|
return true;
|
|
29115
29769
|
}
|
|
29116
29770
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
29117
|
-
const workspacePath =
|
|
29118
|
-
const messagesDir =
|
|
29119
|
-
await
|
|
29120
|
-
const reqFile =
|
|
29121
|
-
await
|
|
29771
|
+
const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
29772
|
+
const messagesDir = path23.join(subagentDir, "messages");
|
|
29773
|
+
await mkdir11(messagesDir, { recursive: true });
|
|
29774
|
+
const reqFile = path23.join(messagesDir, `${timestamp}_req.md`);
|
|
29775
|
+
await writeFile5(reqFile, requestInstructions, { encoding: "utf8" });
|
|
29122
29776
|
const reqUri = pathToFileUri2(reqFile);
|
|
29123
29777
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
29124
29778
|
for (const attachment of attachmentPaths) {
|
|
29125
29779
|
chatArgs.push("-a", attachment);
|
|
29126
29780
|
}
|
|
29127
29781
|
chatArgs.push("-a", reqFile);
|
|
29128
|
-
chatArgs.push(`Follow instructions in [${
|
|
29782
|
+
chatArgs.push(`Follow instructions in [${path23.basename(reqFile)}](${reqUri})`);
|
|
29129
29783
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
29130
29784
|
workspacePath,
|
|
29131
|
-
|
|
29785
|
+
path23.basename(subagentDir),
|
|
29132
29786
|
subagentDir,
|
|
29133
29787
|
vscodeCmd
|
|
29134
29788
|
);
|
|
29135
29789
|
if (!workspaceReady) {
|
|
29136
29790
|
throw new Error(
|
|
29137
|
-
`VS Code workspace '${
|
|
29791
|
+
`VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
29138
29792
|
);
|
|
29139
29793
|
}
|
|
29140
29794
|
await sleep(500);
|
|
@@ -29142,9 +29796,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
29142
29796
|
await raceSpawnError(child);
|
|
29143
29797
|
}
|
|
29144
29798
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
29145
|
-
const workspacePath =
|
|
29146
|
-
const messagesDir =
|
|
29147
|
-
await
|
|
29799
|
+
const workspacePath = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
29800
|
+
const messagesDir = path23.join(subagentDir, "messages");
|
|
29801
|
+
await mkdir11(messagesDir, { recursive: true });
|
|
29148
29802
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
29149
29803
|
for (const attachment of attachmentPaths) {
|
|
29150
29804
|
chatArgs.push("-a", attachment);
|
|
@@ -29152,13 +29806,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
29152
29806
|
chatArgs.push(chatInstruction);
|
|
29153
29807
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
29154
29808
|
workspacePath,
|
|
29155
|
-
|
|
29809
|
+
path23.basename(subagentDir),
|
|
29156
29810
|
subagentDir,
|
|
29157
29811
|
vscodeCmd
|
|
29158
29812
|
);
|
|
29159
29813
|
if (!workspaceReady) {
|
|
29160
29814
|
throw new Error(
|
|
29161
|
-
`VS Code workspace '${
|
|
29815
|
+
`VS Code workspace '${path23.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
29162
29816
|
);
|
|
29163
29817
|
}
|
|
29164
29818
|
await sleep(500);
|
|
@@ -29180,10 +29834,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
29180
29834
|
}
|
|
29181
29835
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
29182
29836
|
const folderPath = folder.path;
|
|
29183
|
-
if (
|
|
29837
|
+
if (path24.isAbsolute(folderPath)) {
|
|
29184
29838
|
return folder;
|
|
29185
29839
|
}
|
|
29186
|
-
const absolutePath =
|
|
29840
|
+
const absolutePath = path24.resolve(templateDir, folderPath);
|
|
29187
29841
|
return {
|
|
29188
29842
|
...folder,
|
|
29189
29843
|
path: absolutePath
|
|
@@ -29205,19 +29859,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
29205
29859
|
if (locationMap && typeof locationMap === "object") {
|
|
29206
29860
|
const transformedMap = {};
|
|
29207
29861
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
29208
|
-
const isAbsolute =
|
|
29862
|
+
const isAbsolute = path24.isAbsolute(locationPath);
|
|
29209
29863
|
if (isAbsolute) {
|
|
29210
29864
|
transformedMap[locationPath] = value;
|
|
29211
29865
|
} else {
|
|
29212
29866
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
29213
29867
|
if (firstGlobIndex === -1) {
|
|
29214
|
-
const resolvedPath =
|
|
29868
|
+
const resolvedPath = path24.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
29215
29869
|
transformedMap[resolvedPath] = value;
|
|
29216
29870
|
} else {
|
|
29217
29871
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
29218
29872
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
29219
29873
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
29220
|
-
const resolvedPath = (
|
|
29874
|
+
const resolvedPath = (path24.resolve(templateDir, basePath) + patternPath).replace(
|
|
29221
29875
|
/\\/g,
|
|
29222
29876
|
"/"
|
|
29223
29877
|
);
|
|
@@ -29256,7 +29910,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
29256
29910
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
29257
29911
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
29258
29912
|
for (const subagent of subagents) {
|
|
29259
|
-
const lockFile =
|
|
29913
|
+
const lockFile = path25.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
29260
29914
|
if (!await pathExists(lockFile)) {
|
|
29261
29915
|
return subagent.absolutePath;
|
|
29262
29916
|
}
|
|
@@ -29266,7 +29920,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
29266
29920
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
29267
29921
|
let workspaceContent;
|
|
29268
29922
|
if (workspaceTemplate) {
|
|
29269
|
-
const workspaceSrc =
|
|
29923
|
+
const workspaceSrc = path25.resolve(workspaceTemplate);
|
|
29270
29924
|
if (!await pathExists(workspaceSrc)) {
|
|
29271
29925
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
29272
29926
|
}
|
|
@@ -29274,18 +29928,18 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
29274
29928
|
if (!stats.isFile()) {
|
|
29275
29929
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
29276
29930
|
}
|
|
29277
|
-
const templateText = await
|
|
29931
|
+
const templateText = await readFile8(workspaceSrc, "utf8");
|
|
29278
29932
|
workspaceContent = JSON.parse(templateText);
|
|
29279
29933
|
} else {
|
|
29280
29934
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
29281
29935
|
}
|
|
29282
|
-
const workspaceName = `${
|
|
29283
|
-
const workspaceDst =
|
|
29284
|
-
const templateDir = workspaceTemplate ?
|
|
29936
|
+
const workspaceName = `${path25.basename(subagentDir)}.code-workspace`;
|
|
29937
|
+
const workspaceDst = path25.join(subagentDir, workspaceName);
|
|
29938
|
+
const templateDir = workspaceTemplate ? path25.dirname(path25.resolve(workspaceTemplate)) : subagentDir;
|
|
29285
29939
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
29286
29940
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
29287
29941
|
if (cwd) {
|
|
29288
|
-
const absCwd =
|
|
29942
|
+
const absCwd = path25.resolve(cwd);
|
|
29289
29943
|
const parsed = JSON.parse(transformedContent);
|
|
29290
29944
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
29291
29945
|
if (!alreadyPresent) {
|
|
@@ -29293,36 +29947,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
29293
29947
|
transformedContent = JSON.stringify(parsed, null, 2);
|
|
29294
29948
|
}
|
|
29295
29949
|
}
|
|
29296
|
-
await
|
|
29297
|
-
const messagesDir =
|
|
29298
|
-
await
|
|
29950
|
+
await writeFile6(workspaceDst, transformedContent, "utf8");
|
|
29951
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
29952
|
+
await mkdir12(messagesDir, { recursive: true });
|
|
29299
29953
|
return { workspace: workspaceDst, messagesDir };
|
|
29300
29954
|
}
|
|
29301
29955
|
async function createSubagentLock(subagentDir) {
|
|
29302
|
-
const messagesDir =
|
|
29956
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
29303
29957
|
if (await pathExists(messagesDir)) {
|
|
29304
29958
|
const files = await readdir4(messagesDir);
|
|
29305
29959
|
await Promise.all(
|
|
29306
29960
|
files.map(async (file2) => {
|
|
29307
|
-
const target =
|
|
29961
|
+
const target = path25.join(messagesDir, file2);
|
|
29308
29962
|
await removeIfExists(target);
|
|
29309
29963
|
})
|
|
29310
29964
|
);
|
|
29311
29965
|
}
|
|
29312
|
-
const githubAgentsDir =
|
|
29966
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
29313
29967
|
if (await pathExists(githubAgentsDir)) {
|
|
29314
29968
|
const agentFiles = await readdir4(githubAgentsDir);
|
|
29315
29969
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
29316
29970
|
await Promise.all(
|
|
29317
|
-
agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(
|
|
29971
|
+
agentFiles.filter((file2) => file2.endsWith(".md") && !preservedFiles.has(file2)).map((file2) => removeIfExists(path25.join(githubAgentsDir, file2)))
|
|
29318
29972
|
);
|
|
29319
29973
|
}
|
|
29320
|
-
const lockFile =
|
|
29321
|
-
await
|
|
29974
|
+
const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
29975
|
+
await writeFile6(lockFile, "", { encoding: "utf8" });
|
|
29322
29976
|
return lockFile;
|
|
29323
29977
|
}
|
|
29324
29978
|
async function removeSubagentLock(subagentDir) {
|
|
29325
|
-
const lockFile =
|
|
29979
|
+
const lockFile = path25.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
29326
29980
|
await removeIfExists(lockFile);
|
|
29327
29981
|
}
|
|
29328
29982
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -29342,9 +29996,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
29342
29996
|
return 1;
|
|
29343
29997
|
}
|
|
29344
29998
|
if (promptFile) {
|
|
29345
|
-
const githubAgentsDir =
|
|
29346
|
-
await
|
|
29347
|
-
const agentFile =
|
|
29999
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
30000
|
+
await mkdir12(githubAgentsDir, { recursive: true });
|
|
30001
|
+
const agentFile = path25.join(githubAgentsDir, `${chatId}.md`);
|
|
29348
30002
|
try {
|
|
29349
30003
|
await copyFile(promptFile, agentFile);
|
|
29350
30004
|
} catch (error40) {
|
|
@@ -29361,7 +30015,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
29361
30015
|
if (!promptFile) {
|
|
29362
30016
|
return void 0;
|
|
29363
30017
|
}
|
|
29364
|
-
const resolvedPrompt =
|
|
30018
|
+
const resolvedPrompt = path26.resolve(promptFile);
|
|
29365
30019
|
if (!await pathExists(resolvedPrompt)) {
|
|
29366
30020
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
29367
30021
|
}
|
|
@@ -29377,7 +30031,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
29377
30031
|
}
|
|
29378
30032
|
const resolved = [];
|
|
29379
30033
|
for (const attachment of extraAttachments) {
|
|
29380
|
-
const resolvedPath =
|
|
30034
|
+
const resolvedPath = path26.resolve(attachment);
|
|
29381
30035
|
if (!await pathExists(resolvedPath)) {
|
|
29382
30036
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
29383
30037
|
}
|
|
@@ -29419,7 +30073,7 @@ async function dispatchAgentSession(options) {
|
|
|
29419
30073
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
29420
30074
|
};
|
|
29421
30075
|
}
|
|
29422
|
-
const subagentName =
|
|
30076
|
+
const subagentName = path26.basename(subagentDir);
|
|
29423
30077
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
29424
30078
|
const preparationResult = await prepareSubagentDirectory(
|
|
29425
30079
|
subagentDir,
|
|
@@ -29447,9 +30101,9 @@ async function dispatchAgentSession(options) {
|
|
|
29447
30101
|
};
|
|
29448
30102
|
}
|
|
29449
30103
|
const timestamp = generateTimestamp();
|
|
29450
|
-
const messagesDir =
|
|
29451
|
-
const responseFileTmp =
|
|
29452
|
-
const responseFileFinal =
|
|
30104
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
30105
|
+
const responseFileTmp = path26.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
30106
|
+
const responseFileFinal = path26.join(messagesDir, `${timestamp}_res.md`);
|
|
29453
30107
|
const requestInstructions = createRequestPrompt(
|
|
29454
30108
|
userQuery,
|
|
29455
30109
|
responseFileTmp,
|
|
@@ -29554,7 +30208,7 @@ async function dispatchBatchAgent(options) {
|
|
|
29554
30208
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
29555
30209
|
};
|
|
29556
30210
|
}
|
|
29557
|
-
subagentName =
|
|
30211
|
+
subagentName = path26.basename(subagentDir);
|
|
29558
30212
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
29559
30213
|
const preparationResult = await prepareSubagentDirectory(
|
|
29560
30214
|
subagentDir,
|
|
@@ -29585,24 +30239,24 @@ async function dispatchBatchAgent(options) {
|
|
|
29585
30239
|
};
|
|
29586
30240
|
}
|
|
29587
30241
|
const timestamp = generateTimestamp();
|
|
29588
|
-
const messagesDir =
|
|
30242
|
+
const messagesDir = path26.join(subagentDir, "messages");
|
|
29589
30243
|
requestFiles = userQueries.map(
|
|
29590
|
-
(_, index) =>
|
|
30244
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
29591
30245
|
);
|
|
29592
30246
|
const responseTmpFiles = userQueries.map(
|
|
29593
|
-
(_, index) =>
|
|
30247
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
29594
30248
|
);
|
|
29595
30249
|
responseFilesFinal = userQueries.map(
|
|
29596
|
-
(_, index) =>
|
|
30250
|
+
(_, index) => path26.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
29597
30251
|
);
|
|
29598
|
-
const orchestratorFile =
|
|
30252
|
+
const orchestratorFile = path26.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
29599
30253
|
if (!dryRun) {
|
|
29600
30254
|
await Promise.all(
|
|
29601
30255
|
userQueries.map((query, index) => {
|
|
29602
30256
|
const reqFile = requestFiles[index];
|
|
29603
30257
|
const tmpFile = responseTmpFiles[index];
|
|
29604
30258
|
const finalFile = responseFilesFinal[index];
|
|
29605
|
-
return
|
|
30259
|
+
return writeFile7(
|
|
29606
30260
|
reqFile,
|
|
29607
30261
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
29608
30262
|
{ encoding: "utf8" }
|
|
@@ -29614,7 +30268,7 @@ async function dispatchBatchAgent(options) {
|
|
|
29614
30268
|
responseFilesFinal,
|
|
29615
30269
|
orchestratorTemplateContent
|
|
29616
30270
|
);
|
|
29617
|
-
await
|
|
30271
|
+
await writeFile7(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
29618
30272
|
}
|
|
29619
30273
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
29620
30274
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -29708,7 +30362,7 @@ async function provisionSubagents(options) {
|
|
|
29708
30362
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
29709
30363
|
throw new Error("subagents must be a positive integer");
|
|
29710
30364
|
}
|
|
29711
|
-
const targetPath =
|
|
30365
|
+
const targetPath = path27.resolve(targetRoot);
|
|
29712
30366
|
if (!dryRun) {
|
|
29713
30367
|
await ensureDir(targetPath);
|
|
29714
30368
|
}
|
|
@@ -29728,7 +30382,7 @@ async function provisionSubagents(options) {
|
|
|
29728
30382
|
continue;
|
|
29729
30383
|
}
|
|
29730
30384
|
highestNumber = Math.max(highestNumber, parsed);
|
|
29731
|
-
const lockFile =
|
|
30385
|
+
const lockFile = path27.join(entry.absolutePath, lockName);
|
|
29732
30386
|
const locked = await pathExists(lockFile);
|
|
29733
30387
|
if (locked) {
|
|
29734
30388
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -29745,10 +30399,10 @@ async function provisionSubagents(options) {
|
|
|
29745
30399
|
break;
|
|
29746
30400
|
}
|
|
29747
30401
|
const subagentDir = subagent.absolutePath;
|
|
29748
|
-
const githubAgentsDir =
|
|
29749
|
-
const lockFile =
|
|
29750
|
-
const workspaceDst =
|
|
29751
|
-
const wakeupDst =
|
|
30402
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
30403
|
+
const lockFile = path27.join(subagentDir, lockName);
|
|
30404
|
+
const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
30405
|
+
const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
|
|
29752
30406
|
const isLocked = await pathExists(lockFile);
|
|
29753
30407
|
if (isLocked && !force) {
|
|
29754
30408
|
continue;
|
|
@@ -29757,8 +30411,8 @@ async function provisionSubagents(options) {
|
|
|
29757
30411
|
if (!dryRun) {
|
|
29758
30412
|
await removeIfExists(lockFile);
|
|
29759
30413
|
await ensureDir(githubAgentsDir);
|
|
29760
|
-
await
|
|
29761
|
-
await
|
|
30414
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
30415
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
29762
30416
|
}
|
|
29763
30417
|
created.push(subagentDir);
|
|
29764
30418
|
lockedSubagents.delete(subagentDir);
|
|
@@ -29768,8 +30422,8 @@ async function provisionSubagents(options) {
|
|
|
29768
30422
|
if (!isLocked && force) {
|
|
29769
30423
|
if (!dryRun) {
|
|
29770
30424
|
await ensureDir(githubAgentsDir);
|
|
29771
|
-
await
|
|
29772
|
-
await
|
|
30425
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
30426
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
29773
30427
|
}
|
|
29774
30428
|
created.push(subagentDir);
|
|
29775
30429
|
subagentsProvisioned += 1;
|
|
@@ -29777,8 +30431,8 @@ async function provisionSubagents(options) {
|
|
|
29777
30431
|
}
|
|
29778
30432
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
29779
30433
|
await ensureDir(githubAgentsDir);
|
|
29780
|
-
await
|
|
29781
|
-
await
|
|
30434
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
30435
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
29782
30436
|
}
|
|
29783
30437
|
skippedExisting.push(subagentDir);
|
|
29784
30438
|
subagentsProvisioned += 1;
|
|
@@ -29786,15 +30440,15 @@ async function provisionSubagents(options) {
|
|
|
29786
30440
|
let nextIndex = highestNumber;
|
|
29787
30441
|
while (subagentsProvisioned < subagents) {
|
|
29788
30442
|
nextIndex += 1;
|
|
29789
|
-
const subagentDir =
|
|
29790
|
-
const githubAgentsDir =
|
|
29791
|
-
const workspaceDst =
|
|
29792
|
-
const wakeupDst =
|
|
30443
|
+
const subagentDir = path27.join(targetPath, `subagent-${nextIndex}`);
|
|
30444
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
30445
|
+
const workspaceDst = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
30446
|
+
const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
|
|
29793
30447
|
if (!dryRun) {
|
|
29794
30448
|
await ensureDir(subagentDir);
|
|
29795
30449
|
await ensureDir(githubAgentsDir);
|
|
29796
|
-
await
|
|
29797
|
-
await
|
|
30450
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
30451
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
29798
30452
|
}
|
|
29799
30453
|
created.push(subagentDir);
|
|
29800
30454
|
subagentsProvisioned += 1;
|
|
@@ -29974,7 +30628,7 @@ var VSCodeProvider = class {
|
|
|
29974
30628
|
async function locateVSCodeExecutable(candidate) {
|
|
29975
30629
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
29976
30630
|
if (includesPathSeparator) {
|
|
29977
|
-
const resolved =
|
|
30631
|
+
const resolved = path28.isAbsolute(candidate) ? candidate : path28.resolve(candidate);
|
|
29978
30632
|
try {
|
|
29979
30633
|
await access2(resolved, constants2.F_OK);
|
|
29980
30634
|
return resolved;
|
|
@@ -30016,7 +30670,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
30016
30670
|
return "";
|
|
30017
30671
|
}
|
|
30018
30672
|
const buildList = (files) => files.map((absolutePath) => {
|
|
30019
|
-
const fileName =
|
|
30673
|
+
const fileName = path28.basename(absolutePath);
|
|
30020
30674
|
const fileUri = pathToFileUri3(absolutePath);
|
|
30021
30675
|
return `* [${fileName}](${fileUri})`;
|
|
30022
30676
|
});
|
|
@@ -30037,7 +30691,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
30037
30691
|
}
|
|
30038
30692
|
const unique = /* @__PURE__ */ new Map();
|
|
30039
30693
|
for (const attachment of attachments) {
|
|
30040
|
-
const absolutePath =
|
|
30694
|
+
const absolutePath = path28.resolve(attachment);
|
|
30041
30695
|
if (!unique.has(absolutePath)) {
|
|
30042
30696
|
unique.set(absolutePath, absolutePath);
|
|
30043
30697
|
}
|
|
@@ -30045,7 +30699,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
30045
30699
|
return Array.from(unique.values());
|
|
30046
30700
|
}
|
|
30047
30701
|
function pathToFileUri3(filePath) {
|
|
30048
|
-
const absolutePath =
|
|
30702
|
+
const absolutePath = path28.isAbsolute(filePath) ? filePath : path28.resolve(filePath);
|
|
30049
30703
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
30050
30704
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
30051
30705
|
return `file:///${normalizedPath}`;
|
|
@@ -30058,7 +30712,7 @@ function normalizeAttachments(attachments) {
|
|
|
30058
30712
|
}
|
|
30059
30713
|
const deduped = /* @__PURE__ */ new Set();
|
|
30060
30714
|
for (const attachment of attachments) {
|
|
30061
|
-
deduped.add(
|
|
30715
|
+
deduped.add(path28.resolve(attachment));
|
|
30062
30716
|
}
|
|
30063
30717
|
return Array.from(deduped);
|
|
30064
30718
|
}
|
|
@@ -30067,7 +30721,7 @@ function mergeAttachments(all) {
|
|
|
30067
30721
|
for (const list of all) {
|
|
30068
30722
|
if (!list) continue;
|
|
30069
30723
|
for (const inputFile of list) {
|
|
30070
|
-
deduped.add(
|
|
30724
|
+
deduped.add(path28.resolve(inputFile));
|
|
30071
30725
|
}
|
|
30072
30726
|
}
|
|
30073
30727
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -30150,11 +30804,11 @@ async function fileExists2(filePath) {
|
|
|
30150
30804
|
}
|
|
30151
30805
|
}
|
|
30152
30806
|
async function readTargetDefinitions(filePath) {
|
|
30153
|
-
const absolutePath =
|
|
30807
|
+
const absolutePath = path29.resolve(filePath);
|
|
30154
30808
|
if (!await fileExists2(absolutePath)) {
|
|
30155
30809
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
30156
30810
|
}
|
|
30157
|
-
const raw = await
|
|
30811
|
+
const raw = await readFile9(absolutePath, "utf8");
|
|
30158
30812
|
const parsed = parseYamlValue(raw);
|
|
30159
30813
|
if (!isRecord(parsed)) {
|
|
30160
30814
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -30171,11 +30825,11 @@ function listTargetNames(definitions) {
|
|
|
30171
30825
|
async function discoverProviders(registry2, baseDir) {
|
|
30172
30826
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
30173
30827
|
const candidateDirs = [];
|
|
30174
|
-
let dir =
|
|
30175
|
-
const root =
|
|
30828
|
+
let dir = path30.resolve(baseDir);
|
|
30829
|
+
const root = path30.parse(dir).root;
|
|
30176
30830
|
while (dir !== root) {
|
|
30177
|
-
candidateDirs.push(
|
|
30178
|
-
dir =
|
|
30831
|
+
candidateDirs.push(path30.join(dir, ".agentv", "providers"));
|
|
30832
|
+
dir = path30.dirname(dir);
|
|
30179
30833
|
}
|
|
30180
30834
|
let files = [];
|
|
30181
30835
|
for (const providersDir of candidateDirs) {
|
|
@@ -30191,7 +30845,7 @@ async function discoverProviders(registry2, baseDir) {
|
|
|
30191
30845
|
}
|
|
30192
30846
|
const discoveredKinds = [];
|
|
30193
30847
|
for (const filePath of files) {
|
|
30194
|
-
const basename =
|
|
30848
|
+
const basename = path30.basename(filePath);
|
|
30195
30849
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
30196
30850
|
if (registry2.has(kindName)) {
|
|
30197
30851
|
continue;
|
|
@@ -30207,7 +30861,7 @@ async function discoverProviders(registry2, baseDir) {
|
|
|
30207
30861
|
}
|
|
30208
30862
|
function createBuiltinProviderRegistry() {
|
|
30209
30863
|
const registry2 = new ProviderRegistry();
|
|
30210
|
-
registry2.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("copilot-log", (t) => new CopilotLogProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
|
|
30864
|
+
registry2.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("copilot-log", (t) => new CopilotLogProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("replay", (t) => new ReplayProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
|
|
30211
30865
|
"vscode-insiders",
|
|
30212
30866
|
(t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
|
|
30213
30867
|
);
|
|
@@ -30326,10 +30980,13 @@ function containsTemplateVariables(text) {
|
|
|
30326
30980
|
return false;
|
|
30327
30981
|
}
|
|
30328
30982
|
async function executePromptTemplate(script, context, config2, timeoutMs) {
|
|
30983
|
+
const messages = context.trace?.messages ?? context.output ?? [];
|
|
30329
30984
|
const payload = {
|
|
30330
30985
|
criteria: context.evalCase.criteria,
|
|
30331
30986
|
expectedOutput: context.evalCase.expected_output,
|
|
30332
|
-
output: context.
|
|
30987
|
+
output: context.candidate,
|
|
30988
|
+
answer: context.candidate,
|
|
30989
|
+
messages,
|
|
30333
30990
|
inputFiles: context.evalCase.file_paths,
|
|
30334
30991
|
input: context.evalCase.input,
|
|
30335
30992
|
metadata: context.evalCase.metadata ?? null,
|
|
@@ -30340,7 +30997,7 @@ async function executePromptTemplate(script, context, config2, timeoutMs) {
|
|
|
30340
30997
|
};
|
|
30341
30998
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
30342
30999
|
const scriptPath = script[script.length - 1];
|
|
30343
|
-
const cwd =
|
|
31000
|
+
const cwd = path31.dirname(scriptPath);
|
|
30344
31001
|
try {
|
|
30345
31002
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
30346
31003
|
const prompt = stdout.trim();
|
|
@@ -30622,11 +31279,11 @@ function createBuiltinRegistry() {
|
|
|
30622
31279
|
async function discoverAssertions(registry2, baseDir) {
|
|
30623
31280
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
30624
31281
|
const candidateDirs = [];
|
|
30625
|
-
let dir =
|
|
30626
|
-
const root =
|
|
31282
|
+
let dir = path322.resolve(baseDir);
|
|
31283
|
+
const root = path322.parse(dir).root;
|
|
30627
31284
|
while (dir !== root) {
|
|
30628
|
-
candidateDirs.push(
|
|
30629
|
-
dir =
|
|
31285
|
+
candidateDirs.push(path322.join(dir, ".agentv", "assertions"));
|
|
31286
|
+
dir = path322.dirname(dir);
|
|
30630
31287
|
}
|
|
30631
31288
|
let files = [];
|
|
30632
31289
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -30642,7 +31299,7 @@ async function discoverAssertions(registry2, baseDir) {
|
|
|
30642
31299
|
}
|
|
30643
31300
|
const discoveredTypes = [];
|
|
30644
31301
|
for (const filePath of files) {
|
|
30645
|
-
const basename =
|
|
31302
|
+
const basename = path322.basename(filePath);
|
|
30646
31303
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
30647
31304
|
if (registry2.has(typeName)) {
|
|
30648
31305
|
continue;
|
|
@@ -30661,12 +31318,12 @@ async function discoverAssertions(registry2, baseDir) {
|
|
|
30661
31318
|
async function discoverGraders(registry2, baseDir) {
|
|
30662
31319
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
30663
31320
|
const candidateDirs = [];
|
|
30664
|
-
let dir =
|
|
30665
|
-
const root =
|
|
31321
|
+
let dir = path33.resolve(baseDir);
|
|
31322
|
+
const root = path33.parse(dir).root;
|
|
30666
31323
|
while (dir !== root) {
|
|
30667
|
-
candidateDirs.push(
|
|
30668
|
-
candidateDirs.push(
|
|
30669
|
-
dir =
|
|
31324
|
+
candidateDirs.push(path33.join(dir, ".agentv", "graders"));
|
|
31325
|
+
candidateDirs.push(path33.join(dir, ".agentv", "judges"));
|
|
31326
|
+
dir = path33.dirname(dir);
|
|
30670
31327
|
}
|
|
30671
31328
|
let files = [];
|
|
30672
31329
|
for (const gradersDir of candidateDirs) {
|
|
@@ -30682,7 +31339,7 @@ async function discoverGraders(registry2, baseDir) {
|
|
|
30682
31339
|
}
|
|
30683
31340
|
const discoveredTypes = [];
|
|
30684
31341
|
for (const filePath of files) {
|
|
30685
|
-
const basename =
|
|
31342
|
+
const basename = path33.basename(filePath);
|
|
30686
31343
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
30687
31344
|
if (registry2.has(typeName)) {
|
|
30688
31345
|
continue;
|
|
@@ -30865,14 +31522,14 @@ async function isDirectory(filePath) {
|
|
|
30865
31522
|
}
|
|
30866
31523
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
30867
31524
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
30868
|
-
return
|
|
31525
|
+
return path34.join(root, evalRunId, caseId);
|
|
30869
31526
|
}
|
|
30870
31527
|
async function copyDirectoryRecursive(src, dest) {
|
|
30871
|
-
await
|
|
31528
|
+
await mkdir14(dest, { recursive: true });
|
|
30872
31529
|
const entries = await readdir5(src, { withFileTypes: true });
|
|
30873
31530
|
for (const entry of entries) {
|
|
30874
|
-
const srcPath =
|
|
30875
|
-
const destPath =
|
|
31531
|
+
const srcPath = path34.join(src, entry.name);
|
|
31532
|
+
const destPath = path34.join(dest, entry.name);
|
|
30876
31533
|
if (entry.name === ".git") {
|
|
30877
31534
|
continue;
|
|
30878
31535
|
}
|
|
@@ -30884,7 +31541,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
30884
31541
|
}
|
|
30885
31542
|
}
|
|
30886
31543
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
30887
|
-
const resolvedTemplatePath =
|
|
31544
|
+
const resolvedTemplatePath = path34.resolve(templatePath);
|
|
30888
31545
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
30889
31546
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
30890
31547
|
}
|
|
@@ -30933,7 +31590,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
30933
31590
|
}
|
|
30934
31591
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
30935
31592
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
30936
|
-
const evalDir =
|
|
31593
|
+
const evalDir = path34.join(root, evalRunId);
|
|
30937
31594
|
if (await fileExists(evalDir)) {
|
|
30938
31595
|
await rm4(evalDir, { recursive: true, force: true });
|
|
30939
31596
|
}
|
|
@@ -30986,14 +31643,14 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
30986
31643
|
const canonical = {
|
|
30987
31644
|
repos: [...repos].sort((a, b) => (a.path ?? "").localeCompare(b.path ?? "")).map(normalizeRepoForFingerprint)
|
|
30988
31645
|
};
|
|
30989
|
-
return
|
|
31646
|
+
return createHash2("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
30990
31647
|
}
|
|
30991
31648
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
30992
|
-
await
|
|
31649
|
+
await mkdir15(dest, { recursive: true });
|
|
30993
31650
|
const entries = await readdir6(src, { withFileTypes: true });
|
|
30994
31651
|
for (const entry of entries) {
|
|
30995
|
-
const srcPath =
|
|
30996
|
-
const destPath =
|
|
31652
|
+
const srcPath = path35.join(src, entry.name);
|
|
31653
|
+
const destPath = path35.join(dest, entry.name);
|
|
30997
31654
|
if (entry.name === ".git") {
|
|
30998
31655
|
continue;
|
|
30999
31656
|
}
|
|
@@ -31026,8 +31683,8 @@ var WorkspacePoolManager = class {
|
|
|
31026
31683
|
async acquireWorkspace(options) {
|
|
31027
31684
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
31028
31685
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
31029
|
-
const poolDir =
|
|
31030
|
-
await
|
|
31686
|
+
const poolDir = path35.join(this.poolRoot, fingerprint);
|
|
31687
|
+
await mkdir15(poolDir, { recursive: true });
|
|
31031
31688
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
31032
31689
|
if (drifted) {
|
|
31033
31690
|
console.warn(
|
|
@@ -31036,7 +31693,7 @@ var WorkspacePoolManager = class {
|
|
|
31036
31693
|
await this.removeAllSlots(poolDir);
|
|
31037
31694
|
}
|
|
31038
31695
|
for (let i = 0; i < maxSlots; i++) {
|
|
31039
|
-
const slotPath =
|
|
31696
|
+
const slotPath = path35.join(poolDir, `slot-${i}`);
|
|
31040
31697
|
const lockPath = `${slotPath}.lock`;
|
|
31041
31698
|
const locked = await this.tryLock(lockPath);
|
|
31042
31699
|
if (!locked) {
|
|
@@ -31054,7 +31711,7 @@ var WorkspacePoolManager = class {
|
|
|
31054
31711
|
poolDir
|
|
31055
31712
|
};
|
|
31056
31713
|
}
|
|
31057
|
-
await
|
|
31714
|
+
await mkdir15(slotPath, { recursive: true });
|
|
31058
31715
|
if (templatePath) {
|
|
31059
31716
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
31060
31717
|
}
|
|
@@ -31091,14 +31748,14 @@ var WorkspacePoolManager = class {
|
|
|
31091
31748
|
async tryLock(lockPath) {
|
|
31092
31749
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
31093
31750
|
try {
|
|
31094
|
-
await
|
|
31751
|
+
await writeFile9(lockPath, String(process.pid), { flag: "wx" });
|
|
31095
31752
|
return true;
|
|
31096
31753
|
} catch (err) {
|
|
31097
31754
|
if (err.code !== "EEXIST") {
|
|
31098
31755
|
throw err;
|
|
31099
31756
|
}
|
|
31100
31757
|
try {
|
|
31101
|
-
const pidStr = await
|
|
31758
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
31102
31759
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
31103
31760
|
if (!Number.isNaN(pid)) {
|
|
31104
31761
|
try {
|
|
@@ -31123,9 +31780,9 @@ var WorkspacePoolManager = class {
|
|
|
31123
31780
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
31124
31781
|
*/
|
|
31125
31782
|
async checkDrift(poolDir, fingerprint) {
|
|
31126
|
-
const metadataPath =
|
|
31783
|
+
const metadataPath = path35.join(poolDir, "metadata.json");
|
|
31127
31784
|
try {
|
|
31128
|
-
const raw = await
|
|
31785
|
+
const raw = await readFile10(metadataPath, "utf-8");
|
|
31129
31786
|
const metadata = JSON.parse(raw);
|
|
31130
31787
|
return metadata.fingerprint !== fingerprint;
|
|
31131
31788
|
} catch {
|
|
@@ -31140,17 +31797,17 @@ var WorkspacePoolManager = class {
|
|
|
31140
31797
|
repos,
|
|
31141
31798
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
31142
31799
|
};
|
|
31143
|
-
await
|
|
31800
|
+
await writeFile9(path35.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
31144
31801
|
}
|
|
31145
31802
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
31146
31803
|
async removeAllSlots(poolDir) {
|
|
31147
31804
|
const entries = await readdir6(poolDir);
|
|
31148
31805
|
for (const entry of entries) {
|
|
31149
31806
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
31150
|
-
const lockPath =
|
|
31807
|
+
const lockPath = path35.join(poolDir, `${entry}.lock`);
|
|
31151
31808
|
if (existsSync3(lockPath)) {
|
|
31152
31809
|
try {
|
|
31153
|
-
const pidStr = await
|
|
31810
|
+
const pidStr = await readFile10(lockPath, "utf-8");
|
|
31154
31811
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
31155
31812
|
if (!Number.isNaN(pid)) {
|
|
31156
31813
|
try {
|
|
@@ -31163,12 +31820,12 @@ var WorkspacePoolManager = class {
|
|
|
31163
31820
|
} catch {
|
|
31164
31821
|
}
|
|
31165
31822
|
}
|
|
31166
|
-
await rm5(
|
|
31823
|
+
await rm5(path35.join(poolDir, entry), { recursive: true, force: true });
|
|
31167
31824
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
31168
31825
|
});
|
|
31169
31826
|
}
|
|
31170
31827
|
}
|
|
31171
|
-
await rm5(
|
|
31828
|
+
await rm5(path35.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
31172
31829
|
});
|
|
31173
31830
|
}
|
|
31174
31831
|
/**
|
|
@@ -31179,7 +31836,7 @@ var WorkspacePoolManager = class {
|
|
|
31179
31836
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
31180
31837
|
for (const repo of repos) {
|
|
31181
31838
|
if (!repo.path || !repo.source) continue;
|
|
31182
|
-
const repoDir =
|
|
31839
|
+
const repoDir = path35.join(slotPath, repo.path);
|
|
31183
31840
|
if (!existsSync3(repoDir)) {
|
|
31184
31841
|
continue;
|
|
31185
31842
|
}
|
|
@@ -31319,7 +31976,7 @@ ${lines.join("\n")}`;
|
|
|
31319
31976
|
}
|
|
31320
31977
|
return;
|
|
31321
31978
|
}
|
|
31322
|
-
const targetDir =
|
|
31979
|
+
const targetDir = path36.join(workspacePath, repo.path);
|
|
31323
31980
|
const sourceUrl = getSourceUrl(repo.source);
|
|
31324
31981
|
const startedAt = Date.now();
|
|
31325
31982
|
if (this.verbose) {
|
|
@@ -31416,7 +32073,7 @@ ${lines.join("\n")}`;
|
|
|
31416
32073
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
31417
32074
|
for (const repo of repos) {
|
|
31418
32075
|
if (!repo.path || !repo.source) continue;
|
|
31419
|
-
const targetDir =
|
|
32076
|
+
const targetDir = path36.join(workspacePath, repo.path);
|
|
31420
32077
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
31421
32078
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
31422
32079
|
}
|
|
@@ -31426,11 +32083,11 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
31426
32083
|
if (!templatePath) {
|
|
31427
32084
|
return void 0;
|
|
31428
32085
|
}
|
|
31429
|
-
const resolved =
|
|
32086
|
+
const resolved = path37.resolve(templatePath);
|
|
31430
32087
|
const stats = await stat7(resolved);
|
|
31431
32088
|
if (stats.isFile()) {
|
|
31432
32089
|
return {
|
|
31433
|
-
dir:
|
|
32090
|
+
dir: path37.dirname(resolved),
|
|
31434
32091
|
workspaceFile: resolved
|
|
31435
32092
|
};
|
|
31436
32093
|
}
|
|
@@ -31442,14 +32099,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
31442
32099
|
if (workspaceFiles.length === 1) {
|
|
31443
32100
|
return {
|
|
31444
32101
|
dir: resolved,
|
|
31445
|
-
workspaceFile:
|
|
32102
|
+
workspaceFile: path37.join(resolved, workspaceFiles[0])
|
|
31446
32103
|
};
|
|
31447
32104
|
}
|
|
31448
32105
|
if (workspaceFiles.length > 1) {
|
|
31449
32106
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
31450
32107
|
return {
|
|
31451
32108
|
dir: resolved,
|
|
31452
|
-
workspaceFile: conventionFile ?
|
|
32109
|
+
workspaceFile: conventionFile ? path37.join(resolved, conventionFile) : void 0
|
|
31453
32110
|
};
|
|
31454
32111
|
}
|
|
31455
32112
|
return { dir: resolved };
|
|
@@ -31568,14 +32225,14 @@ function isAgentSkillsFormat(parsed) {
|
|
|
31568
32225
|
return Array.isArray(obj.evals);
|
|
31569
32226
|
}
|
|
31570
32227
|
async function loadTestsFromAgentSkills(filePath) {
|
|
31571
|
-
const raw = await
|
|
32228
|
+
const raw = await readFile11(filePath, "utf8");
|
|
31572
32229
|
let parsed;
|
|
31573
32230
|
try {
|
|
31574
32231
|
parsed = JSON.parse(raw);
|
|
31575
32232
|
} catch {
|
|
31576
32233
|
throw new Error(`Invalid Agent Skills evals.json: failed to parse JSON in '${filePath}'`);
|
|
31577
32234
|
}
|
|
31578
|
-
return parseAgentSkillsEvals(parsed, filePath,
|
|
32235
|
+
return parseAgentSkillsEvals(parsed, filePath, path38.dirname(path38.resolve(filePath)));
|
|
31579
32236
|
}
|
|
31580
32237
|
function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
31581
32238
|
if (!isAgentSkillsFormat(parsed)) {
|
|
@@ -31613,7 +32270,7 @@ function parseAgentSkillsEvals(parsed, source = "evals.json", baseDir) {
|
|
|
31613
32270
|
if (baseDir) {
|
|
31614
32271
|
metadata.agent_skills_base_dir = baseDir;
|
|
31615
32272
|
for (const file2 of evalCase.files) {
|
|
31616
|
-
filePaths.push(
|
|
32273
|
+
filePaths.push(path38.resolve(baseDir, file2));
|
|
31617
32274
|
}
|
|
31618
32275
|
}
|
|
31619
32276
|
}
|
|
@@ -31649,15 +32306,15 @@ function resolveToAbsolutePath(candidate) {
|
|
|
31649
32306
|
if (candidate.startsWith("file:")) {
|
|
31650
32307
|
return fileURLToPath4(candidate);
|
|
31651
32308
|
}
|
|
31652
|
-
return
|
|
32309
|
+
return path39.resolve(candidate);
|
|
31653
32310
|
}
|
|
31654
32311
|
throw new TypeError("Unsupported repoRoot value. Expected string or URL.");
|
|
31655
32312
|
}
|
|
31656
32313
|
function buildDirectoryChain2(filePath, repoRoot) {
|
|
31657
32314
|
const directories = [];
|
|
31658
32315
|
const seen = /* @__PURE__ */ new Set();
|
|
31659
|
-
const boundary =
|
|
31660
|
-
let current =
|
|
32316
|
+
const boundary = path39.resolve(repoRoot);
|
|
32317
|
+
let current = path39.resolve(path39.dirname(filePath));
|
|
31661
32318
|
while (current !== void 0) {
|
|
31662
32319
|
if (!seen.has(current)) {
|
|
31663
32320
|
directories.push(current);
|
|
@@ -31666,7 +32323,7 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
31666
32323
|
if (current === boundary) {
|
|
31667
32324
|
break;
|
|
31668
32325
|
}
|
|
31669
|
-
const parent =
|
|
32326
|
+
const parent = path39.dirname(current);
|
|
31670
32327
|
if (parent === current) {
|
|
31671
32328
|
break;
|
|
31672
32329
|
}
|
|
@@ -31680,16 +32337,16 @@ function buildDirectoryChain2(filePath, repoRoot) {
|
|
|
31680
32337
|
function buildSearchRoots2(evalPath, repoRoot) {
|
|
31681
32338
|
const uniqueRoots = [];
|
|
31682
32339
|
const addRoot = (root) => {
|
|
31683
|
-
const normalized =
|
|
32340
|
+
const normalized = path39.resolve(root);
|
|
31684
32341
|
if (!uniqueRoots.includes(normalized)) {
|
|
31685
32342
|
uniqueRoots.push(normalized);
|
|
31686
32343
|
}
|
|
31687
32344
|
};
|
|
31688
|
-
let currentDir =
|
|
32345
|
+
let currentDir = path39.dirname(evalPath);
|
|
31689
32346
|
let reachedBoundary = false;
|
|
31690
32347
|
while (!reachedBoundary) {
|
|
31691
32348
|
addRoot(currentDir);
|
|
31692
|
-
const parentDir =
|
|
32349
|
+
const parentDir = path39.dirname(currentDir);
|
|
31693
32350
|
if (currentDir === repoRoot || parentDir === currentDir) {
|
|
31694
32351
|
reachedBoundary = true;
|
|
31695
32352
|
} else {
|
|
@@ -31707,16 +32364,16 @@ function trimLeadingSeparators2(value) {
|
|
|
31707
32364
|
async function resolveFileReference3(rawValue, searchRoots) {
|
|
31708
32365
|
const displayPath = trimLeadingSeparators2(rawValue);
|
|
31709
32366
|
const potentialPaths = [];
|
|
31710
|
-
if (
|
|
31711
|
-
potentialPaths.push(
|
|
32367
|
+
if (path39.isAbsolute(rawValue)) {
|
|
32368
|
+
potentialPaths.push(path39.normalize(rawValue));
|
|
31712
32369
|
}
|
|
31713
32370
|
for (const base of searchRoots) {
|
|
31714
|
-
potentialPaths.push(
|
|
32371
|
+
potentialPaths.push(path39.resolve(base, displayPath));
|
|
31715
32372
|
}
|
|
31716
32373
|
const attempted = [];
|
|
31717
32374
|
const seen = /* @__PURE__ */ new Set();
|
|
31718
32375
|
for (const candidate of potentialPaths) {
|
|
31719
|
-
const absoluteCandidate =
|
|
32376
|
+
const absoluteCandidate = path39.resolve(candidate);
|
|
31720
32377
|
if (seen.has(absoluteCandidate)) {
|
|
31721
32378
|
continue;
|
|
31722
32379
|
}
|
|
@@ -31737,9 +32394,9 @@ var DEFAULT_EVAL_PATTERNS = [
|
|
|
31737
32394
|
];
|
|
31738
32395
|
async function loadConfig(evalFilePath, repoRoot) {
|
|
31739
32396
|
const directories = buildDirectoryChain2(evalFilePath, repoRoot);
|
|
31740
|
-
const globalConfigPath =
|
|
32397
|
+
const globalConfigPath = path40.join(getAgentvConfigDir(), "config.yaml");
|
|
31741
32398
|
for (const directory of directories) {
|
|
31742
|
-
const configPath =
|
|
32399
|
+
const configPath = path40.join(directory, ".agentv", "config.yaml");
|
|
31743
32400
|
if (!await fileExists3(configPath)) {
|
|
31744
32401
|
continue;
|
|
31745
32402
|
}
|
|
@@ -31752,7 +32409,7 @@ async function loadConfig(evalFilePath, repoRoot) {
|
|
|
31752
32409
|
}
|
|
31753
32410
|
async function readConfigFile(configPath) {
|
|
31754
32411
|
try {
|
|
31755
|
-
const rawConfig = await
|
|
32412
|
+
const rawConfig = await readFile12(configPath, "utf8");
|
|
31756
32413
|
const parsed = interpolateEnv(parseYamlValue(rawConfig), process.env);
|
|
31757
32414
|
if (!isJsonObject(parsed)) {
|
|
31758
32415
|
logWarning(`Invalid config.yaml format at ${configPath}`);
|
|
@@ -32102,6 +32759,14 @@ function parseResultsConfig(raw, configPath) {
|
|
|
32102
32759
|
logWarning(`Invalid results.repo in ${configPath}, expected non-empty string`);
|
|
32103
32760
|
return void 0;
|
|
32104
32761
|
}
|
|
32762
|
+
let branch;
|
|
32763
|
+
if (obj.branch !== void 0) {
|
|
32764
|
+
if (typeof obj.branch !== "string" || obj.branch.trim().length === 0) {
|
|
32765
|
+
logWarning(`Invalid results.branch in ${configPath}, expected non-empty string`);
|
|
32766
|
+
return void 0;
|
|
32767
|
+
}
|
|
32768
|
+
branch = obj.branch.trim();
|
|
32769
|
+
}
|
|
32105
32770
|
let resultsPath;
|
|
32106
32771
|
if (obj.path !== void 0) {
|
|
32107
32772
|
if (typeof obj.path !== "string" || obj.path.trim().length === 0) {
|
|
@@ -32132,6 +32797,7 @@ function parseResultsConfig(raw, configPath) {
|
|
|
32132
32797
|
return {
|
|
32133
32798
|
mode: "github",
|
|
32134
32799
|
repo,
|
|
32800
|
+
...branch !== void 0 && { branch },
|
|
32135
32801
|
...resultsPath !== void 0 && { path: resultsPath },
|
|
32136
32802
|
...typeof obj.auto_push === "boolean" && { auto_push: obj.auto_push },
|
|
32137
32803
|
...branchPrefix && { branch_prefix: branchPrefix }
|
|
@@ -32168,7 +32834,7 @@ function logWarning(message) {
|
|
|
32168
32834
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
32169
32835
|
var ANSI_RESET4 = "\x1B[0m";
|
|
32170
32836
|
async function validateCustomPromptContent(promptPath) {
|
|
32171
|
-
const content = await
|
|
32837
|
+
const content = await readFile13(promptPath, "utf8");
|
|
32172
32838
|
validateTemplateVariables(content, promptPath);
|
|
32173
32839
|
}
|
|
32174
32840
|
function validateTemplateVariables(content, source) {
|
|
@@ -32257,8 +32923,8 @@ function isTemplateReference(value) {
|
|
|
32257
32923
|
}
|
|
32258
32924
|
async function resolveAssertionTemplateReference(include, searchRoots) {
|
|
32259
32925
|
const templateCandidates = isTemplateReference(include) ? [
|
|
32260
|
-
|
|
32261
|
-
|
|
32926
|
+
path41.join(".agentv", "templates", `${include}.yaml`),
|
|
32927
|
+
path41.join(".agentv", "templates", `${include}.yml`)
|
|
32262
32928
|
] : [include];
|
|
32263
32929
|
const attempted = [];
|
|
32264
32930
|
for (const candidate of templateCandidates) {
|
|
@@ -32298,7 +32964,7 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
|
32298
32964
|
const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
|
|
32299
32965
|
throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
|
|
32300
32966
|
}
|
|
32301
|
-
const content = await
|
|
32967
|
+
const content = await readFile14(resolved.resolvedPath, "utf8");
|
|
32302
32968
|
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
32303
32969
|
if (!isJsonObject2(parsed)) {
|
|
32304
32970
|
throw new Error(
|
|
@@ -32311,10 +32977,10 @@ ${resolved.attempted.map((attempt) => ` Tried: ${attempt}`).join("\n")}` : "";
|
|
|
32311
32977
|
`Invalid assertion template file in '${evalId}': ${resolved.resolvedPath} is missing a top-level assertions array`
|
|
32312
32978
|
);
|
|
32313
32979
|
}
|
|
32314
|
-
const templateDir =
|
|
32980
|
+
const templateDir = path41.dirname(resolved.resolvedPath);
|
|
32315
32981
|
const nestedSearchRoots = [
|
|
32316
32982
|
templateDir,
|
|
32317
|
-
...searchRoots.filter((root) =>
|
|
32983
|
+
...searchRoots.filter((root) => path41.resolve(root) !== templateDir)
|
|
32318
32984
|
];
|
|
32319
32985
|
return await expandGraderEntries(assertions, nestedSearchRoots, evalId, {
|
|
32320
32986
|
depth: nextDepth,
|
|
@@ -32375,20 +33041,20 @@ async function collectAssertionTemplateReferencesFromValue(value, searchRoots, e
|
|
|
32375
33041
|
references.push({
|
|
32376
33042
|
kind: "assertion_template",
|
|
32377
33043
|
displayPath: resolved.displayPath,
|
|
32378
|
-
...resolved.resolvedPath ? { resolvedPath:
|
|
33044
|
+
...resolved.resolvedPath ? { resolvedPath: path41.resolve(resolved.resolvedPath) } : {}
|
|
32379
33045
|
});
|
|
32380
33046
|
if (resolved.resolvedPath) {
|
|
32381
33047
|
if (includeContext.chain.includes(resolved.resolvedPath)) {
|
|
32382
33048
|
const cycle = [...includeContext.chain, resolved.resolvedPath].join(" -> ");
|
|
32383
33049
|
throw new Error(`Assertion template cycle detected in '${evalId}': ${cycle}`);
|
|
32384
33050
|
}
|
|
32385
|
-
const content = await
|
|
33051
|
+
const content = await readFile14(resolved.resolvedPath, "utf8");
|
|
32386
33052
|
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
32387
33053
|
if (isJsonObject2(parsed) && Array.isArray(parsed.assertions)) {
|
|
32388
|
-
const templateDir =
|
|
33054
|
+
const templateDir = path41.dirname(resolved.resolvedPath);
|
|
32389
33055
|
const nestedSearchRoots = [
|
|
32390
33056
|
templateDir,
|
|
32391
|
-
...searchRoots.filter((root) =>
|
|
33057
|
+
...searchRoots.filter((root) => path41.resolve(root) !== templateDir)
|
|
32392
33058
|
];
|
|
32393
33059
|
references.push(
|
|
32394
33060
|
...await collectAssertionTemplateReferencesFromValue(
|
|
@@ -32574,7 +33240,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
32574
33240
|
if (cwd) {
|
|
32575
33241
|
const resolved = await resolveFileReference3(cwd, searchRoots);
|
|
32576
33242
|
if (resolved.resolvedPath) {
|
|
32577
|
-
resolvedCwd =
|
|
33243
|
+
resolvedCwd = path41.resolve(resolved.resolvedPath);
|
|
32578
33244
|
} else {
|
|
32579
33245
|
logWarning2(
|
|
32580
33246
|
`Code-grader evaluator '${name}' in '${evalId}': cwd not found (${resolved.displayPath})`,
|
|
@@ -32760,7 +33426,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
32760
33426
|
aggregatorPrompt = fileRef;
|
|
32761
33427
|
const resolved = await resolveFileReference3(fileRef, searchRoots);
|
|
32762
33428
|
if (resolved.resolvedPath) {
|
|
32763
|
-
promptPath2 =
|
|
33429
|
+
promptPath2 = path41.resolve(resolved.resolvedPath);
|
|
32764
33430
|
} else {
|
|
32765
33431
|
throw new Error(
|
|
32766
33432
|
`Composite aggregator in '${evalId}': prompt file not found: ${resolved.displayPath}`
|
|
@@ -33440,7 +34106,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
33440
34106
|
const commandPath = commandArray[commandArray.length - 1];
|
|
33441
34107
|
const resolved = await resolveFileReference3(commandPath, searchRoots);
|
|
33442
34108
|
if (resolved.resolvedPath) {
|
|
33443
|
-
resolvedPromptScript = [...commandArray.slice(0, -1),
|
|
34109
|
+
resolvedPromptScript = [...commandArray.slice(0, -1), path41.resolve(resolved.resolvedPath)];
|
|
33444
34110
|
} else {
|
|
33445
34111
|
throw new Error(
|
|
33446
34112
|
`Grader '${name}' in '${evalId}': prompt command file not found: ${resolved.displayPath}`
|
|
@@ -33455,7 +34121,7 @@ async function parseGraderList(candidateEvaluators, searchRoots, evalId, default
|
|
|
33455
34121
|
prompt = fileRef;
|
|
33456
34122
|
const resolved = await resolveFileReference3(fileRef, searchRoots);
|
|
33457
34123
|
if (resolved.resolvedPath) {
|
|
33458
|
-
promptPath =
|
|
34124
|
+
promptPath = path41.resolve(resolved.resolvedPath);
|
|
33459
34125
|
try {
|
|
33460
34126
|
await validateCustomPromptContent(promptPath);
|
|
33461
34127
|
} catch (error40) {
|
|
@@ -33613,7 +34279,7 @@ async function parsePreprocessors(rawValue, searchRoots, evaluatorName, evalId)
|
|
|
33613
34279
|
preprocessors.push({
|
|
33614
34280
|
type,
|
|
33615
34281
|
command,
|
|
33616
|
-
resolvedCommand: [...command.slice(0, -1),
|
|
34282
|
+
resolvedCommand: [...command.slice(0, -1), path41.resolve(resolved.resolvedPath)]
|
|
33617
34283
|
});
|
|
33618
34284
|
}
|
|
33619
34285
|
return preprocessors;
|
|
@@ -33708,10 +34374,10 @@ async function resolveOptionalCommandSource(command, searchRoots) {
|
|
|
33708
34374
|
return void 0;
|
|
33709
34375
|
}
|
|
33710
34376
|
const resolved = await resolveFileReference3(candidate, searchRoots);
|
|
33711
|
-
return resolved.resolvedPath ?
|
|
34377
|
+
return resolved.resolvedPath ? path41.resolve(resolved.resolvedPath) : void 0;
|
|
33712
34378
|
}
|
|
33713
34379
|
function looksLikeFilePath(value) {
|
|
33714
|
-
return
|
|
34380
|
+
return path41.isAbsolute(value) || value.startsWith(".") || value.includes("/") || value.includes("\\") || /\.[cm]?[jt]sx?$|\.py$|\.sh$|\.bash$|\.rb$|\.go$|\.rs$/i.test(value);
|
|
33715
34381
|
}
|
|
33716
34382
|
function parseCommandToArgv(command) {
|
|
33717
34383
|
if (process.platform === "win32") {
|
|
@@ -34099,7 +34765,7 @@ var IMAGE_MEDIA_TYPES = {
|
|
|
34099
34765
|
".bmp": "image/bmp"
|
|
34100
34766
|
};
|
|
34101
34767
|
function detectImageMediaType(filePath) {
|
|
34102
|
-
const ext =
|
|
34768
|
+
const ext = path422.extname(filePath).toLowerCase();
|
|
34103
34769
|
return IMAGE_MEDIA_TYPES[ext];
|
|
34104
34770
|
}
|
|
34105
34771
|
var ANSI_YELLOW5 = "\x1B[33m";
|
|
@@ -34158,12 +34824,12 @@ async function processMessages(options) {
|
|
|
34158
34824
|
continue;
|
|
34159
34825
|
}
|
|
34160
34826
|
try {
|
|
34161
|
-
const fileContent = (await
|
|
34827
|
+
const fileContent = (await readFile15(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
34162
34828
|
processedContent.push({
|
|
34163
34829
|
...cloneJsonObject(rawSegment),
|
|
34164
34830
|
path: displayPath,
|
|
34165
34831
|
text: fileContent,
|
|
34166
|
-
resolvedPath:
|
|
34832
|
+
resolvedPath: path422.resolve(resolvedPath)
|
|
34167
34833
|
});
|
|
34168
34834
|
if (verbose) {
|
|
34169
34835
|
const label = messageType === "input" ? "[File]" : "[Expected Output File]";
|
|
@@ -34199,7 +34865,7 @@ async function processMessages(options) {
|
|
|
34199
34865
|
continue;
|
|
34200
34866
|
}
|
|
34201
34867
|
try {
|
|
34202
|
-
const imageBuffer = await
|
|
34868
|
+
const imageBuffer = await readFile15(resolvedPath);
|
|
34203
34869
|
const base643 = imageBuffer.toString("base64");
|
|
34204
34870
|
processedContent.push({
|
|
34205
34871
|
type: "image",
|
|
@@ -34282,12 +34948,12 @@ async function processExpectedMessages(options) {
|
|
|
34282
34948
|
continue;
|
|
34283
34949
|
}
|
|
34284
34950
|
try {
|
|
34285
|
-
const fileContent = (await
|
|
34951
|
+
const fileContent = (await readFile15(resolvedPath, "utf8")).replace(/\r\n/g, "\n");
|
|
34286
34952
|
processedContent.push({
|
|
34287
34953
|
type: "file",
|
|
34288
34954
|
path: displayPath,
|
|
34289
34955
|
text: fileContent,
|
|
34290
|
-
resolvedPath:
|
|
34956
|
+
resolvedPath: path422.resolve(resolvedPath)
|
|
34291
34957
|
});
|
|
34292
34958
|
if (verbose) {
|
|
34293
34959
|
console.log(` [Expected Output File] Found: ${displayPath}`);
|
|
@@ -34322,7 +34988,7 @@ async function processExpectedMessages(options) {
|
|
|
34322
34988
|
continue;
|
|
34323
34989
|
}
|
|
34324
34990
|
try {
|
|
34325
|
-
const imageBuffer = await
|
|
34991
|
+
const imageBuffer = await readFile15(resolvedPath);
|
|
34326
34992
|
const base643 = imageBuffer.toString("base64");
|
|
34327
34993
|
processedContent.push({
|
|
34328
34994
|
type: "image",
|
|
@@ -34433,7 +35099,7 @@ function matchesFilter(id, filter) {
|
|
|
34433
35099
|
return typeof filter === "string" ? micromatch.isMatch(id, filter) : filter.some((pattern) => micromatch.isMatch(id, pattern));
|
|
34434
35100
|
}
|
|
34435
35101
|
function detectFormat(filePath) {
|
|
34436
|
-
const ext =
|
|
35102
|
+
const ext = path43.extname(filePath).toLowerCase();
|
|
34437
35103
|
if (ext === ".jsonl") return "jsonl";
|
|
34438
35104
|
if (ext === ".yaml" || ext === ".yml") return "yaml";
|
|
34439
35105
|
if (ext === ".json") return "agent-skills-json";
|
|
@@ -34443,9 +35109,9 @@ function detectFormat(filePath) {
|
|
|
34443
35109
|
);
|
|
34444
35110
|
}
|
|
34445
35111
|
async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
34446
|
-
const dir =
|
|
34447
|
-
const base =
|
|
34448
|
-
const sidecarPath =
|
|
35112
|
+
const dir = path43.dirname(jsonlPath);
|
|
35113
|
+
const base = path43.basename(jsonlPath, ".jsonl");
|
|
35114
|
+
const sidecarPath = path43.join(dir, `${base}.yaml`);
|
|
34449
35115
|
if (!await fileExists3(sidecarPath)) {
|
|
34450
35116
|
if (verbose) {
|
|
34451
35117
|
logWarning4(`Sidecar metadata file not found: ${sidecarPath} (using defaults)`);
|
|
@@ -34453,7 +35119,7 @@ async function loadSidecarMetadata(jsonlPath, verbose) {
|
|
|
34453
35119
|
return {};
|
|
34454
35120
|
}
|
|
34455
35121
|
try {
|
|
34456
|
-
const content = await
|
|
35122
|
+
const content = await readFile16(sidecarPath, "utf8");
|
|
34457
35123
|
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
34458
35124
|
if (!isJsonObject(parsed)) {
|
|
34459
35125
|
logWarning4(`Invalid sidecar metadata format in ${sidecarPath}`);
|
|
@@ -34494,13 +35160,13 @@ function parseJsonlContent(content, filePath) {
|
|
|
34494
35160
|
async function loadTestsFromJsonl(evalFilePath, repoRoot, options) {
|
|
34495
35161
|
const verbose = options?.verbose ?? false;
|
|
34496
35162
|
const filterPattern = options?.filter;
|
|
34497
|
-
const absoluteTestPath =
|
|
35163
|
+
const absoluteTestPath = path43.resolve(evalFilePath);
|
|
34498
35164
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
34499
35165
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
34500
35166
|
const sidecar = await loadSidecarMetadata(absoluteTestPath, verbose);
|
|
34501
|
-
const rawFile = await
|
|
35167
|
+
const rawFile = await readFile16(absoluteTestPath, "utf8");
|
|
34502
35168
|
const rawCases = parseJsonlContent(rawFile, evalFilePath);
|
|
34503
|
-
const fallbackSuiteName =
|
|
35169
|
+
const fallbackSuiteName = path43.basename(absoluteTestPath, ".jsonl") || "eval";
|
|
34504
35170
|
const suiteName = sidecar.name && sidecar.name.trim().length > 0 ? sidecar.name : fallbackSuiteName;
|
|
34505
35171
|
const globalEvaluator = coerceEvaluator(sidecar.evaluator, "sidecar") ?? "llm-grader";
|
|
34506
35172
|
const globalExecution = sidecar.execution;
|
|
@@ -34906,8 +35572,8 @@ function interpolateRawEvalCase(raw, vars) {
|
|
|
34906
35572
|
}
|
|
34907
35573
|
async function readTestSuiteMetadata(testFilePath) {
|
|
34908
35574
|
try {
|
|
34909
|
-
const absolutePath =
|
|
34910
|
-
const content = await
|
|
35575
|
+
const absolutePath = path44.resolve(testFilePath);
|
|
35576
|
+
const content = await readFile17(absolutePath, "utf8");
|
|
34911
35577
|
const parsed = interpolateEnv(parseYamlValue(content), process.env);
|
|
34912
35578
|
if (!isJsonObject(parsed)) {
|
|
34913
35579
|
return {};
|
|
@@ -34931,7 +35597,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
34931
35597
|
return { tests: await loadTestsFromAgentSkills(evalFilePath) };
|
|
34932
35598
|
}
|
|
34933
35599
|
if (format === "typescript") {
|
|
34934
|
-
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-
|
|
35600
|
+
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-4DU65XGW-YM47FFG2.js");
|
|
34935
35601
|
return loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
|
|
34936
35602
|
}
|
|
34937
35603
|
const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
|
|
@@ -34966,7 +35632,7 @@ async function loadTests(evalFilePath, repoRoot, options) {
|
|
|
34966
35632
|
return loadTestsFromAgentSkills(evalFilePath);
|
|
34967
35633
|
}
|
|
34968
35634
|
if (format === "typescript") {
|
|
34969
|
-
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-
|
|
35635
|
+
const { loadTsEvalSuite: loadTsEvalSuite2 } = await import("./ts-eval-loader-4DU65XGW-YM47FFG2.js");
|
|
34970
35636
|
const suite = await loadTsEvalSuite2(evalFilePath, resolveToAbsolutePath(repoRoot), options);
|
|
34971
35637
|
return suite.tests;
|
|
34972
35638
|
}
|
|
@@ -34977,11 +35643,11 @@ var loadEvalCases = loadTests;
|
|
|
34977
35643
|
async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
34978
35644
|
const verbose = options?.verbose ?? false;
|
|
34979
35645
|
const filterPattern = options?.filter;
|
|
34980
|
-
const absoluteTestPath =
|
|
35646
|
+
const absoluteTestPath = path44.resolve(evalFilePath);
|
|
34981
35647
|
const repoRootPath = resolveToAbsolutePath(repoRoot);
|
|
34982
35648
|
const searchRoots = buildSearchRoots2(absoluteTestPath, repoRootPath);
|
|
34983
35649
|
const config2 = await loadConfig(absoluteTestPath, repoRootPath);
|
|
34984
|
-
const rawFile = await
|
|
35650
|
+
const rawFile = await readFile17(absoluteTestPath, "utf8");
|
|
34985
35651
|
const rawParsed = parseYamlValue(rawFile);
|
|
34986
35652
|
const rawCaseSnapshots = buildRawInlineTestSnapshots(rawParsed);
|
|
34987
35653
|
const interpolated = interpolateEnv(rawParsed, process.env);
|
|
@@ -34990,7 +35656,7 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
34990
35656
|
}
|
|
34991
35657
|
const suite = interpolated;
|
|
34992
35658
|
const suiteNameFromFile = asString5(suite.name)?.trim();
|
|
34993
|
-
const fallbackSuiteName =
|
|
35659
|
+
const fallbackSuiteName = path44.basename(absoluteTestPath).replace(/\.eval\.ya?ml$/i, "").replace(/\.ya?ml$/i, "") || "eval";
|
|
34994
35660
|
const suiteName = suiteNameFromFile && suiteNameFromFile.length > 0 ? suiteNameFromFile : fallbackSuiteName;
|
|
34995
35661
|
const rawTestCases = resolveTests(suite);
|
|
34996
35662
|
const globalEvaluator = coerceEvaluator(suite.evaluator, "global") ?? "llm-grader";
|
|
@@ -35000,10 +35666,10 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
35000
35666
|
"<suite>",
|
|
35001
35667
|
absoluteTestPath
|
|
35002
35668
|
);
|
|
35003
|
-
const evalFileDir =
|
|
35669
|
+
const evalFileDir = path44.dirname(absoluteTestPath);
|
|
35004
35670
|
let expandedTestCases;
|
|
35005
35671
|
if (typeof rawTestCases === "string") {
|
|
35006
|
-
const externalPath =
|
|
35672
|
+
const externalPath = path44.resolve(evalFileDir, rawTestCases);
|
|
35007
35673
|
let isDir = false;
|
|
35008
35674
|
try {
|
|
35009
35675
|
const pathStat = await stat8(externalPath);
|
|
@@ -35307,7 +35973,7 @@ function collectInputSourceReferences(inputMessages) {
|
|
|
35307
35973
|
references.push({
|
|
35308
35974
|
kind: "input_file",
|
|
35309
35975
|
displayPath,
|
|
35310
|
-
...typeof segment.resolvedPath === "string" ? { resolvedPath:
|
|
35976
|
+
...typeof segment.resolvedPath === "string" ? { resolvedPath: path44.resolve(segment.resolvedPath) } : {}
|
|
35311
35977
|
});
|
|
35312
35978
|
}
|
|
35313
35979
|
}
|
|
@@ -35380,7 +36046,7 @@ function collectSingleGraderSourceReferences(evaluator) {
|
|
|
35380
36046
|
references.push({
|
|
35381
36047
|
kind: "code_grader_command",
|
|
35382
36048
|
displayPath: evaluator.aggregator.path,
|
|
35383
|
-
resolvedPath:
|
|
36049
|
+
resolvedPath: path44.resolve(evaluator.aggregator.cwd ?? "", evaluator.aggregator.path),
|
|
35384
36050
|
graderName: evaluator.name
|
|
35385
36051
|
});
|
|
35386
36052
|
} else if (evaluator.aggregator.type === "llm-grader" && evaluator.aggregator.promptPath) {
|
|
@@ -35413,9 +36079,9 @@ function dedupeSourceReferences(references) {
|
|
|
35413
36079
|
return deduped;
|
|
35414
36080
|
}
|
|
35415
36081
|
function toPortableRelativePath(root, candidate) {
|
|
35416
|
-
const relative =
|
|
35417
|
-
if (relative && !relative.startsWith("..") && !
|
|
35418
|
-
return relative.split(
|
|
36082
|
+
const relative = path44.relative(root, candidate);
|
|
36083
|
+
if (relative && !relative.startsWith("..") && !path44.isAbsolute(relative)) {
|
|
36084
|
+
return relative.split(path44.sep).join("/");
|
|
35419
36085
|
}
|
|
35420
36086
|
return void 0;
|
|
35421
36087
|
}
|
|
@@ -35469,8 +36135,8 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
35469
36135
|
if (!command) return void 0;
|
|
35470
36136
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
35471
36137
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
35472
|
-
if (cwd && !
|
|
35473
|
-
cwd =
|
|
36138
|
+
if (cwd && !path44.isAbsolute(cwd)) {
|
|
36139
|
+
cwd = path44.resolve(evalFileDir, cwd);
|
|
35474
36140
|
}
|
|
35475
36141
|
const config2 = { command };
|
|
35476
36142
|
if (timeoutMs !== void 0) {
|
|
@@ -35508,10 +36174,10 @@ function parseWorkspaceHooksConfig(raw, evalFileDir) {
|
|
|
35508
36174
|
}
|
|
35509
36175
|
async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
35510
36176
|
if (typeof raw === "string") {
|
|
35511
|
-
const workspaceFilePath =
|
|
36177
|
+
const workspaceFilePath = path44.resolve(evalFileDir, raw);
|
|
35512
36178
|
let content;
|
|
35513
36179
|
try {
|
|
35514
|
-
content = await
|
|
36180
|
+
content = await readFile17(workspaceFilePath, "utf8");
|
|
35515
36181
|
} catch {
|
|
35516
36182
|
throw new Error(`Workspace file not found: ${raw} (resolved to ${workspaceFilePath})`);
|
|
35517
36183
|
}
|
|
@@ -35521,7 +36187,7 @@ async function resolveWorkspaceConfig(raw, evalFileDir) {
|
|
|
35521
36187
|
`Invalid workspace file format: ${workspaceFilePath} (expected a YAML object)`
|
|
35522
36188
|
);
|
|
35523
36189
|
}
|
|
35524
|
-
const workspaceFileDir =
|
|
36190
|
+
const workspaceFileDir = path44.dirname(workspaceFilePath);
|
|
35525
36191
|
const resolvedWorkspace = parseWorkspaceConfig(parsed, workspaceFileDir);
|
|
35526
36192
|
if (resolvedWorkspace) {
|
|
35527
36193
|
return { ...resolvedWorkspace, workspaceFileDir };
|
|
@@ -35555,8 +36221,8 @@ function parseWorkspaceConfig(raw, evalFileDir) {
|
|
|
35555
36221
|
throw new Error("workspace.static has been removed. Use workspace.mode='static'.");
|
|
35556
36222
|
}
|
|
35557
36223
|
let template = typeof obj.template === "string" ? obj.template : void 0;
|
|
35558
|
-
if (template && !
|
|
35559
|
-
template =
|
|
36224
|
+
if (template && !path44.isAbsolute(template)) {
|
|
36225
|
+
template = path44.resolve(evalFileDir, template);
|
|
35560
36226
|
}
|
|
35561
36227
|
const isolation = obj.isolation === "shared" || obj.isolation === "per_test" ? obj.isolation : void 0;
|
|
35562
36228
|
const repos = Array.isArray(obj.repos) ? obj.repos.map(parseRepoConfig).filter(Boolean) : void 0;
|
|
@@ -35695,6 +36361,9 @@ ${detailBlock}${ANSI_RESET8}`);
|
|
|
35695
36361
|
}
|
|
35696
36362
|
var execFileAsync3 = promisify7(execFile3);
|
|
35697
36363
|
var WORKSPACE_GIT_TIMEOUT_MS = 3e5;
|
|
36364
|
+
function pathFromRoot(root) {
|
|
36365
|
+
return root instanceof URL ? fileURLToPath5(root) : String(root);
|
|
36366
|
+
}
|
|
35698
36367
|
function classifyQualityStatus(score, threshold = DEFAULT_THRESHOLD) {
|
|
35699
36368
|
return score >= threshold ? "ok" : "quality_failure";
|
|
35700
36369
|
}
|
|
@@ -35747,7 +36416,7 @@ function workspaceGitEnv() {
|
|
|
35747
36416
|
};
|
|
35748
36417
|
}
|
|
35749
36418
|
async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
|
|
35750
|
-
if (!existsSync5(
|
|
36419
|
+
if (!existsSync5(path45.join(workspacePath, ".git"))) {
|
|
35751
36420
|
return false;
|
|
35752
36421
|
}
|
|
35753
36422
|
const cleanFlag = resetMode === "strict" ? "-fdx" : "-fd";
|
|
@@ -35790,18 +36459,18 @@ function validateDependencyGraph(tests) {
|
|
|
35790
36459
|
}
|
|
35791
36460
|
const visited = /* @__PURE__ */ new Set();
|
|
35792
36461
|
const visiting = /* @__PURE__ */ new Set();
|
|
35793
|
-
function visit(id,
|
|
36462
|
+
function visit(id, path48) {
|
|
35794
36463
|
if (visiting.has(id)) {
|
|
35795
|
-
const cycle = [...
|
|
36464
|
+
const cycle = [...path48.slice(path48.indexOf(id)), id];
|
|
35796
36465
|
throw new Error(`Circular dependency detected: ${cycle.join(" \u2192 ")}`);
|
|
35797
36466
|
}
|
|
35798
36467
|
if (visited.has(id)) return;
|
|
35799
36468
|
visiting.add(id);
|
|
35800
|
-
|
|
36469
|
+
path48.push(id);
|
|
35801
36470
|
for (const dep of depMap.get(id) ?? []) {
|
|
35802
|
-
visit(dep,
|
|
36471
|
+
visit(dep, path48);
|
|
35803
36472
|
}
|
|
35804
|
-
|
|
36473
|
+
path48.pop();
|
|
35805
36474
|
visiting.delete(id);
|
|
35806
36475
|
visited.add(id);
|
|
35807
36476
|
}
|
|
@@ -35890,8 +36559,10 @@ async function runEvaluation(options) {
|
|
|
35890
36559
|
retainOnFailure,
|
|
35891
36560
|
graderTarget: cliGraderTarget,
|
|
35892
36561
|
model: cliModel,
|
|
35893
|
-
threshold: scoreThreshold
|
|
36562
|
+
threshold: scoreThreshold,
|
|
36563
|
+
replayRecording
|
|
35894
36564
|
} = options;
|
|
36565
|
+
const repoRootPath = pathFromRoot(repoRoot);
|
|
35895
36566
|
let useCache = options.useCache;
|
|
35896
36567
|
if (trials && trials.count > 1 && useCache) {
|
|
35897
36568
|
console.warn(
|
|
@@ -35981,7 +36652,7 @@ async function runEvaluation(options) {
|
|
|
35981
36652
|
];
|
|
35982
36653
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
35983
36654
|
const typeRegistry = createBuiltinRegistry();
|
|
35984
|
-
const discoveryBaseDir = evalFilePath ?
|
|
36655
|
+
const discoveryBaseDir = evalFilePath ? path45.dirname(path45.resolve(evalFilePath)) : process.cwd();
|
|
35985
36656
|
const evalDir = discoveryBaseDir;
|
|
35986
36657
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
35987
36658
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -36023,7 +36694,10 @@ async function runEvaluation(options) {
|
|
|
36023
36694
|
agentTimeoutMs,
|
|
36024
36695
|
targetResolver,
|
|
36025
36696
|
availableTargets,
|
|
36026
|
-
threshold: scoreThreshold
|
|
36697
|
+
threshold: scoreThreshold,
|
|
36698
|
+
replayRecording,
|
|
36699
|
+
evalFilePath,
|
|
36700
|
+
repoRoot: repoRootPath
|
|
36027
36701
|
});
|
|
36028
36702
|
} catch (error40) {
|
|
36029
36703
|
if (verbose) {
|
|
@@ -36135,7 +36809,7 @@ async function runEvaluation(options) {
|
|
|
36135
36809
|
const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
|
|
36136
36810
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
36137
36811
|
if (!dirExists) {
|
|
36138
|
-
await
|
|
36812
|
+
await mkdir16(configuredStaticPath, { recursive: true });
|
|
36139
36813
|
}
|
|
36140
36814
|
if (workspaceTemplate) {
|
|
36141
36815
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -36180,15 +36854,14 @@ async function runEvaluation(options) {
|
|
|
36180
36854
|
}
|
|
36181
36855
|
} else if (!isPerTestIsolation && (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length)) {
|
|
36182
36856
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
36183
|
-
await
|
|
36857
|
+
await mkdir16(sharedWorkspacePath, { recursive: true });
|
|
36184
36858
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
36185
36859
|
}
|
|
36186
36860
|
try {
|
|
36187
36861
|
let toDependencyResult2 = function(r) {
|
|
36188
|
-
const outputText = extractLastAssistantContent(r.output);
|
|
36189
36862
|
return {
|
|
36190
36863
|
score: r.score,
|
|
36191
|
-
output:
|
|
36864
|
+
output: r.output,
|
|
36192
36865
|
workspace_path: r.workspacePath,
|
|
36193
36866
|
details: r.scores ? Object.fromEntries(
|
|
36194
36867
|
r.scores.map((s) => [s.name, { score: s.score, verdict: s.verdict }])
|
|
@@ -36222,7 +36895,7 @@ async function runEvaluation(options) {
|
|
|
36222
36895
|
};
|
|
36223
36896
|
var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2, extractEvaluationCostUsd = extractEvaluationCostUsd2;
|
|
36224
36897
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
36225
|
-
const copiedWorkspaceFile =
|
|
36898
|
+
const copiedWorkspaceFile = path45.join(sharedWorkspacePath, path45.basename(suiteWorkspaceFile));
|
|
36226
36899
|
try {
|
|
36227
36900
|
await stat9(copiedWorkspaceFile);
|
|
36228
36901
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -36238,7 +36911,7 @@ async function runEvaluation(options) {
|
|
|
36238
36911
|
if (needsPerRepoCheck) {
|
|
36239
36912
|
for (const repo of suiteWorkspace.repos) {
|
|
36240
36913
|
if (!repo.path || !repo.source) continue;
|
|
36241
|
-
const targetDir =
|
|
36914
|
+
const targetDir = path45.join(sharedWorkspacePath, repo.path);
|
|
36242
36915
|
if (existsSync5(targetDir)) {
|
|
36243
36916
|
setupLog(`reusing existing repo at: ${targetDir}`);
|
|
36244
36917
|
continue;
|
|
@@ -36427,6 +37100,7 @@ async function runEvaluation(options) {
|
|
|
36427
37100
|
const workerId = nextWorkerId++;
|
|
36428
37101
|
workerIdByEvalId.set(evalCase.id, workerId);
|
|
36429
37102
|
if (runBudgetTracker?.isExceeded()) {
|
|
37103
|
+
const errorMessage = `Run budget exceeded ($${runBudgetTracker.currentCostUsd.toFixed(4)} / $${runBudgetTracker.budgetCapUsd.toFixed(4)})`;
|
|
36430
37104
|
const budgetResult = {
|
|
36431
37105
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
36432
37106
|
testId: evalCase.id,
|
|
@@ -36434,15 +37108,24 @@ async function runEvaluation(options) {
|
|
|
36434
37108
|
category: evalCase.category,
|
|
36435
37109
|
score: 0,
|
|
36436
37110
|
assertions: [],
|
|
36437
|
-
output:
|
|
37111
|
+
output: errorMessage,
|
|
37112
|
+
trace: buildTraceFromMessages({
|
|
37113
|
+
input: evalCase.input,
|
|
37114
|
+
output: [{ role: "assistant", content: errorMessage }],
|
|
37115
|
+
finalOutput: errorMessage,
|
|
37116
|
+
target: target.name,
|
|
37117
|
+
testId: evalCase.id,
|
|
37118
|
+
conversationId: evalCase.conversation_id,
|
|
37119
|
+
error: errorMessage
|
|
37120
|
+
}),
|
|
36438
37121
|
target: target.name,
|
|
36439
|
-
error:
|
|
37122
|
+
error: errorMessage,
|
|
36440
37123
|
budgetExceeded: true,
|
|
36441
37124
|
executionStatus: "execution_error",
|
|
36442
37125
|
failureStage: "setup",
|
|
36443
37126
|
failureReasonCode: "budget_exceeded",
|
|
36444
37127
|
executionError: {
|
|
36445
|
-
message:
|
|
37128
|
+
message: errorMessage,
|
|
36446
37129
|
stage: "setup"
|
|
36447
37130
|
}
|
|
36448
37131
|
};
|
|
@@ -36463,6 +37146,7 @@ async function runEvaluation(options) {
|
|
|
36463
37146
|
return budgetResult;
|
|
36464
37147
|
}
|
|
36465
37148
|
if (budgetUsd !== void 0 && budgetExhausted) {
|
|
37149
|
+
const errorMessage = `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${budgetUsd.toFixed(4)})`;
|
|
36466
37150
|
const budgetResult = {
|
|
36467
37151
|
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
36468
37152
|
testId: evalCase.id,
|
|
@@ -36470,15 +37154,24 @@ async function runEvaluation(options) {
|
|
|
36470
37154
|
category: evalCase.category,
|
|
36471
37155
|
score: 0,
|
|
36472
37156
|
assertions: [],
|
|
36473
|
-
output:
|
|
37157
|
+
output: errorMessage,
|
|
37158
|
+
trace: buildTraceFromMessages({
|
|
37159
|
+
input: evalCase.input,
|
|
37160
|
+
output: [{ role: "assistant", content: errorMessage }],
|
|
37161
|
+
finalOutput: errorMessage,
|
|
37162
|
+
target: target.name,
|
|
37163
|
+
testId: evalCase.id,
|
|
37164
|
+
conversationId: evalCase.conversation_id,
|
|
37165
|
+
error: errorMessage
|
|
37166
|
+
}),
|
|
36474
37167
|
target: target.name,
|
|
36475
|
-
error:
|
|
37168
|
+
error: errorMessage,
|
|
36476
37169
|
budgetExceeded: true,
|
|
36477
37170
|
executionStatus: "execution_error",
|
|
36478
37171
|
failureStage: "setup",
|
|
36479
37172
|
failureReasonCode: "budget_exceeded",
|
|
36480
37173
|
executionError: {
|
|
36481
|
-
message:
|
|
37174
|
+
message: errorMessage,
|
|
36482
37175
|
stage: "setup"
|
|
36483
37176
|
}
|
|
36484
37177
|
};
|
|
@@ -36507,7 +37200,16 @@ async function runEvaluation(options) {
|
|
|
36507
37200
|
category: evalCase.category,
|
|
36508
37201
|
score: 0,
|
|
36509
37202
|
assertions: [],
|
|
36510
|
-
output:
|
|
37203
|
+
output: errorMsg,
|
|
37204
|
+
trace: buildTraceFromMessages({
|
|
37205
|
+
input: evalCase.input,
|
|
37206
|
+
output: [{ role: "assistant", content: errorMsg }],
|
|
37207
|
+
finalOutput: errorMsg,
|
|
37208
|
+
target: target.name,
|
|
37209
|
+
testId: evalCase.id,
|
|
37210
|
+
conversationId: evalCase.conversation_id,
|
|
37211
|
+
error: errorMsg
|
|
37212
|
+
}),
|
|
36511
37213
|
target: target.name,
|
|
36512
37214
|
error: errorMsg,
|
|
36513
37215
|
executionStatus: "execution_error",
|
|
@@ -36572,6 +37274,9 @@ async function runEvaluation(options) {
|
|
|
36572
37274
|
verbose,
|
|
36573
37275
|
threshold: scoreThreshold,
|
|
36574
37276
|
targetHooks: options.targetHooks,
|
|
37277
|
+
replayRecording,
|
|
37278
|
+
evalFilePath,
|
|
37279
|
+
repoRoot: repoRootPath,
|
|
36575
37280
|
...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
|
|
36576
37281
|
};
|
|
36577
37282
|
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
@@ -36653,7 +37358,16 @@ async function runEvaluation(options) {
|
|
|
36653
37358
|
category: evalCase.category,
|
|
36654
37359
|
score: 0,
|
|
36655
37360
|
assertions: [],
|
|
36656
|
-
output:
|
|
37361
|
+
output: errorMsg,
|
|
37362
|
+
trace: buildTraceFromMessages({
|
|
37363
|
+
input: evalCase.input,
|
|
37364
|
+
output: [{ role: "assistant", content: errorMsg }],
|
|
37365
|
+
finalOutput: errorMsg,
|
|
37366
|
+
target: target.name,
|
|
37367
|
+
testId: evalCase.id,
|
|
37368
|
+
conversationId: evalCase.conversation_id,
|
|
37369
|
+
error: errorMsg
|
|
37370
|
+
}),
|
|
36657
37371
|
target: target.name,
|
|
36658
37372
|
error: errorMsg,
|
|
36659
37373
|
executionStatus: "execution_error",
|
|
@@ -36803,7 +37517,10 @@ async function runBatchEvaluation(options) {
|
|
|
36803
37517
|
agentTimeoutMs,
|
|
36804
37518
|
targetResolver,
|
|
36805
37519
|
availableTargets,
|
|
36806
|
-
threshold: batchThreshold
|
|
37520
|
+
threshold: batchThreshold,
|
|
37521
|
+
replayRecording,
|
|
37522
|
+
evalFilePath,
|
|
37523
|
+
repoRoot
|
|
36807
37524
|
} = options;
|
|
36808
37525
|
const promptInputsList = [];
|
|
36809
37526
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
@@ -36817,7 +37534,9 @@ async function runBatchEvaluation(options) {
|
|
|
36817
37534
|
question: promptInputs.question,
|
|
36818
37535
|
systemPrompt: promptInputs.systemMessage,
|
|
36819
37536
|
inputFiles: evalCase.file_paths,
|
|
36820
|
-
evalCaseId: evalCase.id
|
|
37537
|
+
evalCaseId: evalCase.id,
|
|
37538
|
+
suite: evalCase.suite,
|
|
37539
|
+
evalFilePath
|
|
36821
37540
|
};
|
|
36822
37541
|
});
|
|
36823
37542
|
const batchResponse = await provider.invokeBatch?.(batchRequests);
|
|
@@ -36845,6 +37564,16 @@ async function runBatchEvaluation(options) {
|
|
|
36845
37564
|
const evalCase = evalCases[i];
|
|
36846
37565
|
const promptInputs = promptInputsList[i];
|
|
36847
37566
|
const providerResponse = batchResponse[i];
|
|
37567
|
+
await maybeRecordReplayFixture({
|
|
37568
|
+
replayRecording,
|
|
37569
|
+
evalCase,
|
|
37570
|
+
evalFilePath,
|
|
37571
|
+
repoRoot,
|
|
37572
|
+
target,
|
|
37573
|
+
attempt: 0,
|
|
37574
|
+
response: providerResponse,
|
|
37575
|
+
nowFn
|
|
37576
|
+
});
|
|
36848
37577
|
const output = providerResponse.output;
|
|
36849
37578
|
const hasExecutionMetrics = providerResponse.tokenUsage !== void 0 || providerResponse.costUsd !== void 0 || providerResponse.durationMs !== void 0;
|
|
36850
37579
|
const computed = output ? computeTraceSummary(output) : hasExecutionMetrics ? { trace: { eventCount: 0, toolCalls: {}, errorCount: 0 } } : void 0;
|
|
@@ -36892,6 +37621,10 @@ async function runBatchEvaluation(options) {
|
|
|
36892
37621
|
if (providerError) {
|
|
36893
37622
|
result = {
|
|
36894
37623
|
...result,
|
|
37624
|
+
trace: appendErrorEventToTrace(result.trace, providerError, {
|
|
37625
|
+
failure_stage: "agent",
|
|
37626
|
+
failure_reason_code: "provider_error"
|
|
37627
|
+
}),
|
|
36895
37628
|
error: providerError,
|
|
36896
37629
|
executionStatus: "execution_error",
|
|
36897
37630
|
failureStage: "agent",
|
|
@@ -36950,6 +37683,27 @@ async function runBatchEvaluation(options) {
|
|
|
36950
37683
|
}
|
|
36951
37684
|
return results;
|
|
36952
37685
|
}
|
|
37686
|
+
async function maybeRecordReplayFixture(options) {
|
|
37687
|
+
const { replayRecording, evalCase, evalFilePath, repoRoot, target, attempt, response, nowFn } = options;
|
|
37688
|
+
if (!replayRecording || target.kind === "replay") {
|
|
37689
|
+
return;
|
|
37690
|
+
}
|
|
37691
|
+
if (!evalFilePath || !repoRoot) {
|
|
37692
|
+
throw new Error("Replay recording requires evalFilePath and repoRoot");
|
|
37693
|
+
}
|
|
37694
|
+
const record2 = buildReplayFixtureRecord({
|
|
37695
|
+
evalCase,
|
|
37696
|
+
evalFilePath,
|
|
37697
|
+
repoRoot,
|
|
37698
|
+
target,
|
|
37699
|
+
sourceTarget: replayRecording.sourceTarget,
|
|
37700
|
+
attempt,
|
|
37701
|
+
variant: replayRecording.variant,
|
|
37702
|
+
response,
|
|
37703
|
+
now: nowFn
|
|
37704
|
+
});
|
|
37705
|
+
await appendReplayFixtureRecord(replayRecording.fixturesPath, record2);
|
|
37706
|
+
}
|
|
36953
37707
|
async function runEvalCase(options) {
|
|
36954
37708
|
const {
|
|
36955
37709
|
evalCase,
|
|
@@ -36978,7 +37732,10 @@ async function runEvalCase(options) {
|
|
|
36978
37732
|
evalDir,
|
|
36979
37733
|
verbose,
|
|
36980
37734
|
threshold: caseThreshold,
|
|
36981
|
-
dependencyResults
|
|
37735
|
+
dependencyResults,
|
|
37736
|
+
replayRecording,
|
|
37737
|
+
evalFilePath,
|
|
37738
|
+
repoRoot
|
|
36982
37739
|
} = options;
|
|
36983
37740
|
const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
|
|
36984
37741
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
@@ -37020,7 +37777,7 @@ async function runEvalCase(options) {
|
|
|
37020
37777
|
);
|
|
37021
37778
|
}
|
|
37022
37779
|
if (caseWorkspaceFile && workspacePath) {
|
|
37023
|
-
const copiedFile =
|
|
37780
|
+
const copiedFile = path45.join(workspacePath, path45.basename(caseWorkspaceFile));
|
|
37024
37781
|
try {
|
|
37025
37782
|
await stat9(copiedFile);
|
|
37026
37783
|
caseWorkspaceFile = copiedFile;
|
|
@@ -37030,7 +37787,7 @@ async function runEvalCase(options) {
|
|
|
37030
37787
|
}
|
|
37031
37788
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
37032
37789
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
37033
|
-
await
|
|
37790
|
+
await mkdir16(workspacePath, { recursive: true });
|
|
37034
37791
|
}
|
|
37035
37792
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
37036
37793
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -37082,10 +37839,10 @@ async function runEvalCase(options) {
|
|
|
37082
37839
|
const files = evalCase.metadata.agent_skills_files;
|
|
37083
37840
|
if (baseDir && files.length > 0) {
|
|
37084
37841
|
for (const relPath of files) {
|
|
37085
|
-
const srcPath =
|
|
37086
|
-
const destPath =
|
|
37842
|
+
const srcPath = path45.resolve(baseDir, relPath);
|
|
37843
|
+
const destPath = path45.resolve(workspacePath, relPath);
|
|
37087
37844
|
try {
|
|
37088
|
-
await
|
|
37845
|
+
await mkdir16(path45.dirname(destPath), { recursive: true });
|
|
37089
37846
|
await copyFile2(srcPath, destPath);
|
|
37090
37847
|
} catch (error40) {
|
|
37091
37848
|
const message = error40 instanceof Error ? error40.message : String(error40);
|
|
@@ -37277,7 +38034,8 @@ async function runEvalCase(options) {
|
|
|
37277
38034
|
verbose,
|
|
37278
38035
|
threshold: evalCase.threshold ?? caseThreshold,
|
|
37279
38036
|
targetResolver,
|
|
37280
|
-
availableTargets
|
|
38037
|
+
availableTargets,
|
|
38038
|
+
evalFilePath
|
|
37281
38039
|
});
|
|
37282
38040
|
if (workspacePath && !isSharedWorkspace) {
|
|
37283
38041
|
const shouldRetain = conversationResult.executionStatus === "ok" ? retainOnSuccess === "keep" || keepWorkspaces : retainOnFailure === "keep" || !forceCleanup && !keepWorkspaces;
|
|
@@ -37301,6 +38059,7 @@ async function runEvalCase(options) {
|
|
|
37301
38059
|
target,
|
|
37302
38060
|
promptInputs,
|
|
37303
38061
|
attempt,
|
|
38062
|
+
evalFilePath,
|
|
37304
38063
|
agentTimeoutMs,
|
|
37305
38064
|
signal,
|
|
37306
38065
|
cwd: workspacePath,
|
|
@@ -37331,6 +38090,7 @@ async function runEvalCase(options) {
|
|
|
37331
38090
|
target,
|
|
37332
38091
|
promptInputs,
|
|
37333
38092
|
attempt: 0,
|
|
38093
|
+
evalFilePath,
|
|
37334
38094
|
agentTimeoutMs,
|
|
37335
38095
|
signal,
|
|
37336
38096
|
cwd: workspacePath,
|
|
@@ -37366,6 +38126,19 @@ async function runEvalCase(options) {
|
|
|
37366
38126
|
}
|
|
37367
38127
|
return errorResult;
|
|
37368
38128
|
}
|
|
38129
|
+
const responseWasCached = cachedResponse !== void 0 && providerResponse === cachedResponse;
|
|
38130
|
+
if (!responseWasCached) {
|
|
38131
|
+
await maybeRecordReplayFixture({
|
|
38132
|
+
replayRecording,
|
|
38133
|
+
evalCase,
|
|
38134
|
+
evalFilePath,
|
|
38135
|
+
repoRoot,
|
|
38136
|
+
target: targetUsed ? { ...target, name: targetUsed } : target,
|
|
38137
|
+
attempt,
|
|
38138
|
+
response: providerResponse,
|
|
38139
|
+
nowFn
|
|
38140
|
+
});
|
|
38141
|
+
}
|
|
37369
38142
|
if (cacheKey && cache && !cachedResponse) {
|
|
37370
38143
|
await cache.set(cacheKey, providerResponse);
|
|
37371
38144
|
}
|
|
@@ -37512,6 +38285,10 @@ ${providerFileChanges}` : providerFileChanges;
|
|
|
37512
38285
|
...result,
|
|
37513
38286
|
...targetUsedField,
|
|
37514
38287
|
evalRun,
|
|
38288
|
+
trace: appendErrorEventToTrace(result.trace, providerError, {
|
|
38289
|
+
failure_stage: "agent",
|
|
38290
|
+
failure_reason_code: "provider_error"
|
|
38291
|
+
}),
|
|
37515
38292
|
error: providerError,
|
|
37516
38293
|
executionStatus,
|
|
37517
38294
|
failureStage: "agent",
|
|
@@ -37525,6 +38302,10 @@ ${providerFileChanges}` : providerFileChanges;
|
|
|
37525
38302
|
...targetUsedField,
|
|
37526
38303
|
score: 0,
|
|
37527
38304
|
evalRun,
|
|
38305
|
+
trace: appendErrorEventToTrace(result.trace, skippedEvaluatorError, {
|
|
38306
|
+
failure_stage: "evaluator",
|
|
38307
|
+
failure_reason_code: "evaluator_error"
|
|
38308
|
+
}),
|
|
37528
38309
|
error: skippedEvaluatorError,
|
|
37529
38310
|
executionStatus,
|
|
37530
38311
|
failureStage: "evaluator",
|
|
@@ -37689,6 +38470,23 @@ async function evaluateCandidate(options) {
|
|
|
37689
38470
|
threshold: evalThreshold,
|
|
37690
38471
|
dependencyResults
|
|
37691
38472
|
} = options;
|
|
38473
|
+
const input = buildResultInput(promptInputs);
|
|
38474
|
+
const outputMessages = output ?? [{ role: "assistant", content: candidate }];
|
|
38475
|
+
const evaluationTrace = buildTraceFromMessages({
|
|
38476
|
+
input,
|
|
38477
|
+
output: outputMessages,
|
|
38478
|
+
summary: trace,
|
|
38479
|
+
finalOutput: candidate,
|
|
38480
|
+
tokenUsage,
|
|
38481
|
+
costUsd,
|
|
38482
|
+
durationMs,
|
|
38483
|
+
startTime,
|
|
38484
|
+
endTime,
|
|
38485
|
+
provider: provider.kind,
|
|
38486
|
+
target: target.name,
|
|
38487
|
+
testId: evalCase.id,
|
|
38488
|
+
conversationId: evalCase.conversation_id
|
|
38489
|
+
});
|
|
37692
38490
|
const gradeTimestamp = nowFn();
|
|
37693
38491
|
const { score, scores } = await runEvaluatorsForCase({
|
|
37694
38492
|
evalCase,
|
|
@@ -37703,7 +38501,7 @@ async function evaluateCandidate(options) {
|
|
|
37703
38501
|
graderProvider,
|
|
37704
38502
|
agentTimeoutMs,
|
|
37705
38503
|
output,
|
|
37706
|
-
trace,
|
|
38504
|
+
trace: evaluationTrace,
|
|
37707
38505
|
costUsd,
|
|
37708
38506
|
durationMs,
|
|
37709
38507
|
tokenUsage,
|
|
@@ -37743,7 +38541,6 @@ async function evaluateCandidate(options) {
|
|
|
37743
38541
|
...lmRequest ? { lm: lmRequest } : {},
|
|
37744
38542
|
...evaluatorRequest ? { evaluator: evaluatorRequest } : {}
|
|
37745
38543
|
} : void 0;
|
|
37746
|
-
const input = buildResultInput(promptInputs);
|
|
37747
38544
|
return {
|
|
37748
38545
|
timestamp: completedAt.toISOString(),
|
|
37749
38546
|
testId: evalCase.id,
|
|
@@ -37760,9 +38557,9 @@ async function evaluateCandidate(options) {
|
|
|
37760
38557
|
endTime,
|
|
37761
38558
|
requests,
|
|
37762
38559
|
input,
|
|
37763
|
-
output:
|
|
38560
|
+
output: candidate,
|
|
37764
38561
|
scores,
|
|
37765
|
-
trace,
|
|
38562
|
+
trace: evaluationTrace,
|
|
37766
38563
|
fileChanges,
|
|
37767
38564
|
executionStatus: classifyQualityStatus(score.score, evalThreshold)
|
|
37768
38565
|
};
|
|
@@ -37925,7 +38722,7 @@ async function runEvaluatorList(options) {
|
|
|
37925
38722
|
dockerConfig,
|
|
37926
38723
|
dependencyResults
|
|
37927
38724
|
};
|
|
37928
|
-
const evalFileDir = evalCase.file_paths[0] ?
|
|
38725
|
+
const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
37929
38726
|
const dispatchContext = {
|
|
37930
38727
|
graderProvider,
|
|
37931
38728
|
targetResolver,
|
|
@@ -38082,7 +38879,8 @@ async function runConversationMode(options) {
|
|
|
38082
38879
|
verbose,
|
|
38083
38880
|
threshold,
|
|
38084
38881
|
targetResolver,
|
|
38085
|
-
availableTargets
|
|
38882
|
+
availableTargets,
|
|
38883
|
+
evalFilePath
|
|
38086
38884
|
} = options;
|
|
38087
38885
|
const turns = evalCase.turns;
|
|
38088
38886
|
const aggregation = evalCase.aggregation ?? "mean";
|
|
@@ -38120,6 +38918,8 @@ async function runConversationMode(options) {
|
|
|
38120
38918
|
question: userContent,
|
|
38121
38919
|
chatPrompt: chatPromptForProvider,
|
|
38122
38920
|
evalCaseId: `${evalCase.id}/turn-${turnIndex}`,
|
|
38921
|
+
suite: evalCase.suite,
|
|
38922
|
+
evalFilePath,
|
|
38123
38923
|
signal,
|
|
38124
38924
|
cwd: workspacePath,
|
|
38125
38925
|
workspaceFile: caseWorkspaceFile,
|
|
@@ -38257,8 +39057,19 @@ async function runConversationMode(options) {
|
|
|
38257
39057
|
role: m.role,
|
|
38258
39058
|
content: m.content
|
|
38259
39059
|
}));
|
|
38260
|
-
const flatAssertions = allResultScores.flatMap((s) => [...s.assertions]);
|
|
38261
39060
|
const totalDurationMs = Date.now() - caseStartMs;
|
|
39061
|
+
const finalOutput = extractLastAssistantContent(outputMessages);
|
|
39062
|
+
const trace = buildTraceFromMessages({
|
|
39063
|
+
input: evalCase.input,
|
|
39064
|
+
output: outputMessages,
|
|
39065
|
+
finalOutput,
|
|
39066
|
+
durationMs: totalDurationMs,
|
|
39067
|
+
provider: provider.kind,
|
|
39068
|
+
target: target.name,
|
|
39069
|
+
testId: evalCase.id,
|
|
39070
|
+
conversationId: evalCase.conversation_id
|
|
39071
|
+
});
|
|
39072
|
+
const flatAssertions = allResultScores.flatMap((s) => [...s.assertions]);
|
|
38262
39073
|
return {
|
|
38263
39074
|
timestamp: nowFn().toISOString(),
|
|
38264
39075
|
testId: evalCase.id,
|
|
@@ -38267,7 +39078,8 @@ async function runConversationMode(options) {
|
|
|
38267
39078
|
score: finalScore,
|
|
38268
39079
|
assertions: flatAssertions,
|
|
38269
39080
|
target: target.name,
|
|
38270
|
-
output:
|
|
39081
|
+
output: finalOutput,
|
|
39082
|
+
trace,
|
|
38271
39083
|
scores: allResultScores,
|
|
38272
39084
|
executionStatus: classifyQualityStatus(finalScore, threshold ?? DEFAULT_THRESHOLD),
|
|
38273
39085
|
input: evalCase.input.map((m) => ({
|
|
@@ -38339,6 +39151,7 @@ async function invokeProvider(provider, options) {
|
|
|
38339
39151
|
evalCase,
|
|
38340
39152
|
promptInputs,
|
|
38341
39153
|
attempt,
|
|
39154
|
+
evalFilePath,
|
|
38342
39155
|
agentTimeoutMs,
|
|
38343
39156
|
signal,
|
|
38344
39157
|
cwd,
|
|
@@ -38359,6 +39172,8 @@ async function invokeProvider(provider, options) {
|
|
|
38359
39172
|
chatPrompt: promptInputs.chatPrompt,
|
|
38360
39173
|
inputFiles: evalCase.file_paths,
|
|
38361
39174
|
evalCaseId: evalCase.id,
|
|
39175
|
+
suite: evalCase.suite,
|
|
39176
|
+
evalFilePath,
|
|
38362
39177
|
attempt,
|
|
38363
39178
|
signal: controller.signal,
|
|
38364
39179
|
cwd,
|
|
@@ -38400,6 +39215,16 @@ function buildErrorResult(evalCase, targetName, timestamp, error40, promptInputs
|
|
|
38400
39215
|
...lmRequest ? { lm: lmRequest } : {}
|
|
38401
39216
|
} : void 0;
|
|
38402
39217
|
const input = buildResultInput(promptInputs);
|
|
39218
|
+
const output = `Error occurred: ${message}`;
|
|
39219
|
+
const trace = buildTraceFromMessages({
|
|
39220
|
+
input,
|
|
39221
|
+
output: [{ role: "assistant", content: output }],
|
|
39222
|
+
finalOutput: output,
|
|
39223
|
+
target: targetName,
|
|
39224
|
+
testId: evalCase.id,
|
|
39225
|
+
conversationId: evalCase.conversation_id,
|
|
39226
|
+
error: message
|
|
39227
|
+
});
|
|
38403
39228
|
return {
|
|
38404
39229
|
timestamp: timestamp.toISOString(),
|
|
38405
39230
|
testId: evalCase.id,
|
|
@@ -38411,7 +39236,8 @@ function buildErrorResult(evalCase, targetName, timestamp, error40, promptInputs
|
|
|
38411
39236
|
target: targetName,
|
|
38412
39237
|
requests,
|
|
38413
39238
|
input,
|
|
38414
|
-
output
|
|
39239
|
+
output,
|
|
39240
|
+
trace,
|
|
38415
39241
|
error: message,
|
|
38416
39242
|
executionStatus: "execution_error",
|
|
38417
39243
|
failureStage,
|
|
@@ -38432,7 +39258,7 @@ function extractProviderError(response) {
|
|
|
38432
39258
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
38433
39259
|
}
|
|
38434
39260
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
38435
|
-
const hash =
|
|
39261
|
+
const hash = createHash3("sha256");
|
|
38436
39262
|
hash.update(provider.id);
|
|
38437
39263
|
hash.update(target.name);
|
|
38438
39264
|
hash.update(evalCase.id);
|
|
@@ -38654,7 +39480,7 @@ async function evaluate(config2) {
|
|
|
38654
39480
|
cliNoCache: false,
|
|
38655
39481
|
yamlCache: config2.cache === void 0 ? materialized.cache : void 0
|
|
38656
39482
|
});
|
|
38657
|
-
const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ?
|
|
39483
|
+
const cache = cacheEnabled ? new ResponseCache(materialized.cachePath ? path46.resolve(materialized.cachePath) : void 0) : void 0;
|
|
38658
39484
|
const results = await runEvaluation({
|
|
38659
39485
|
testFilePath,
|
|
38660
39486
|
repoRoot,
|
|
@@ -38685,7 +39511,7 @@ async function evaluate(config2) {
|
|
|
38685
39511
|
async function materializeEvalConfig(config2, options) {
|
|
38686
39512
|
const baseDir = options?.baseDir ?? process.cwd();
|
|
38687
39513
|
const repoRoot = options?.repoRoot ?? await findGitRoot(baseDir) ?? baseDir;
|
|
38688
|
-
const testFilePath = config2.specFile ?
|
|
39514
|
+
const testFilePath = config2.specFile ? path46.resolve(baseDir, config2.specFile) : path46.join(baseDir, "__programmatic__.yaml");
|
|
38689
39515
|
const effectiveFilter = options?.filter ?? config2.filter;
|
|
38690
39516
|
if (config2.specFile) {
|
|
38691
39517
|
const suite = await loadTestSuite(testFilePath, repoRoot, {
|
|
@@ -38762,7 +39588,7 @@ function convertAssertions(entries) {
|
|
|
38762
39588
|
}
|
|
38763
39589
|
function buildInlineEvalTests(config2, options) {
|
|
38764
39590
|
const suiteWorkspace = config2.beforeAll ? { hooks: { before_all: toBeforeAllHook(config2.beforeAll) } } : void 0;
|
|
38765
|
-
const derivedSuiteName =
|
|
39591
|
+
const derivedSuiteName = path46.basename(options.testFilePath).replace(/\.eval\.[cm]?ts$/i, "").replace(/\.[cm]?ts$/i, "");
|
|
38766
39592
|
const suiteName = config2.metadata?.name ?? (derivedSuiteName || "eval");
|
|
38767
39593
|
return (config2.tests ?? []).filter((test) => !options.filter || matchesFilter4(test.id, options.filter)).map((test) => {
|
|
38768
39594
|
const isConversation = test.mode === "conversation" || test.turns && test.turns.length > 0;
|
|
@@ -38858,10 +39684,10 @@ function computeSummary(results, durationMs, threshold = DEFAULT_THRESHOLD) {
|
|
|
38858
39684
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
38859
39685
|
async function discoverDefaultTarget(repoRoot) {
|
|
38860
39686
|
const cwd = process.cwd();
|
|
38861
|
-
const chain = buildDirectoryChain(
|
|
39687
|
+
const chain = buildDirectoryChain(path46.join(cwd, "_placeholder"), repoRoot);
|
|
38862
39688
|
for (const dir of chain) {
|
|
38863
39689
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
38864
|
-
const targetsPath =
|
|
39690
|
+
const targetsPath = path46.join(dir, candidate);
|
|
38865
39691
|
if (!existsSync6(targetsPath)) continue;
|
|
38866
39692
|
try {
|
|
38867
39693
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -38878,7 +39704,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
38878
39704
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
38879
39705
|
const envFiles = [];
|
|
38880
39706
|
for (const dir of chain) {
|
|
38881
|
-
const envPath =
|
|
39707
|
+
const envPath = path46.join(dir, ".env");
|
|
38882
39708
|
if (existsSync6(envPath)) envFiles.push(envPath);
|
|
38883
39709
|
}
|
|
38884
39710
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -38904,7 +39730,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
38904
39730
|
}
|
|
38905
39731
|
var EXPORT_NAMES = ["default", "config", "evalConfig"];
|
|
38906
39732
|
async function loadTsEvalFile(filePath) {
|
|
38907
|
-
const absolutePath =
|
|
39733
|
+
const absolutePath = path47.resolve(filePath);
|
|
38908
39734
|
const moduleUrl = pathToFileURL2(absolutePath).href;
|
|
38909
39735
|
const module = await import(moduleUrl);
|
|
38910
39736
|
let config2;
|
|
@@ -38926,7 +39752,7 @@ async function loadTsEvalSuite(filePath, repoRoot, options) {
|
|
|
38926
39752
|
const { config: config2, filePath: absolutePath } = await loadTsEvalFile(filePath);
|
|
38927
39753
|
const materialized = await materializeEvalConfig(config2, {
|
|
38928
39754
|
repoRoot,
|
|
38929
|
-
baseDir:
|
|
39755
|
+
baseDir: path47.dirname(absolutePath),
|
|
38930
39756
|
filter: options?.filter,
|
|
38931
39757
|
category: options?.category
|
|
38932
39758
|
});
|
|
@@ -39034,6 +39860,11 @@ export {
|
|
|
39034
39860
|
NORMALIZED_TRACE_EVENT_TYPES,
|
|
39035
39861
|
NORMALIZED_TOOL_STATUSES,
|
|
39036
39862
|
NORMALIZED_REDACTION_LEVELS,
|
|
39863
|
+
TRACE_SCHEMA_VERSION,
|
|
39864
|
+
TRACE_SOURCE_KINDS,
|
|
39865
|
+
TRACE_EVENT_TYPES,
|
|
39866
|
+
TRACE_TOOL_STATUSES,
|
|
39867
|
+
TRACE_REDACTION_LEVELS,
|
|
39037
39868
|
NormalizedRedactionStateWireSchema,
|
|
39038
39869
|
NormalizedTraceErrorWireSchema,
|
|
39039
39870
|
NormalizedTraceSourceWireSchema,
|
|
@@ -39046,8 +39877,24 @@ export {
|
|
|
39046
39877
|
NormalizedTraceToolWireSchema,
|
|
39047
39878
|
NormalizedTraceEventWireSchema,
|
|
39048
39879
|
NormalizedTrajectoryWireSchema,
|
|
39880
|
+
TraceRedactionStateWireSchema,
|
|
39881
|
+
TraceErrorWireSchema,
|
|
39882
|
+
TraceSourceWireSchema,
|
|
39883
|
+
TraceSessionWireSchema,
|
|
39884
|
+
TraceBranchWireSchema,
|
|
39885
|
+
TraceSourceRefWireSchema,
|
|
39886
|
+
TraceRawEvidenceWireSchema,
|
|
39887
|
+
TraceMessageWireSchema,
|
|
39888
|
+
TraceModelWireSchema,
|
|
39889
|
+
TraceToolWireSchema,
|
|
39890
|
+
TraceEventWireSchema,
|
|
39891
|
+
TraceArtifactWireSchema,
|
|
39049
39892
|
toNormalizedTrajectoryWire,
|
|
39050
39893
|
fromNormalizedTrajectoryWire,
|
|
39894
|
+
toTraceArtifactWire,
|
|
39895
|
+
fromTraceArtifactWire,
|
|
39896
|
+
buildTraceFromMessages,
|
|
39897
|
+
appendErrorEventToTrace,
|
|
39051
39898
|
computeTraceSummary,
|
|
39052
39899
|
getSelectedTrajectoryEvents,
|
|
39053
39900
|
computeTraceSummaryFromTrajectory,
|
|
@@ -39094,6 +39941,14 @@ export {
|
|
|
39094
39941
|
consumePiLogEntries,
|
|
39095
39942
|
subscribeToPiLogEntries,
|
|
39096
39943
|
ProviderRegistry,
|
|
39944
|
+
REPLAY_FIXTURE_SCHEMA_VERSION,
|
|
39945
|
+
readReplayFixtureRecords,
|
|
39946
|
+
serializeReplayFixtureRecord,
|
|
39947
|
+
appendReplayFixtureRecord,
|
|
39948
|
+
findReplayFixtureRecord,
|
|
39949
|
+
replayFixtureRecordToProviderResponse,
|
|
39950
|
+
buildReplayFixtureRecord,
|
|
39951
|
+
ReplayProvider,
|
|
39097
39952
|
ensureVSCodeSubagents,
|
|
39098
39953
|
readTargetDefinitions,
|
|
39099
39954
|
listTargetNames,
|
|
@@ -39150,4 +40005,4 @@ export {
|
|
|
39150
40005
|
loadTsEvalFile,
|
|
39151
40006
|
loadTsEvalSuite
|
|
39152
40007
|
};
|
|
39153
|
-
//# sourceMappingURL=chunk-
|
|
40008
|
+
//# sourceMappingURL=chunk-TUTURE2B.js.map
|