@agentv/core 4.7.0 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-75RFVESM.js → chunk-VCVVKCC4.js} +54 -2
- package/dist/chunk-VCVVKCC4.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +2 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +3 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +337 -153
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +46 -1
- package/dist/index.d.ts +46 -1
- package/dist/index.js +262 -133
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-75RFVESM.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -25,7 +25,7 @@ import {
|
|
|
25
25
|
resolveDelegatedTargetDefinition,
|
|
26
26
|
resolveFileReference,
|
|
27
27
|
resolveTargetDefinition
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-VCVVKCC4.js";
|
|
29
29
|
import {
|
|
30
30
|
AgentvProvider
|
|
31
31
|
} from "./chunk-PRNXHNLF.js";
|
|
@@ -2914,6 +2914,60 @@ function parseMetadata(suite) {
|
|
|
2914
2914
|
});
|
|
2915
2915
|
}
|
|
2916
2916
|
|
|
2917
|
+
// src/evaluation/workspace/repo-config-parser.ts
|
|
2918
|
+
function parseRepoSource(raw) {
|
|
2919
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2920
|
+
const obj = raw;
|
|
2921
|
+
if (obj.type === "git" && typeof obj.url === "string") {
|
|
2922
|
+
return { type: "git", url: obj.url };
|
|
2923
|
+
}
|
|
2924
|
+
if (obj.type === "local" && typeof obj.path === "string") {
|
|
2925
|
+
return { type: "local", path: obj.path };
|
|
2926
|
+
}
|
|
2927
|
+
return void 0;
|
|
2928
|
+
}
|
|
2929
|
+
function parseRepoCheckout(raw) {
|
|
2930
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2931
|
+
const obj = raw;
|
|
2932
|
+
const ref = typeof obj.ref === "string" ? obj.ref : void 0;
|
|
2933
|
+
const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
|
|
2934
|
+
const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
|
|
2935
|
+
if (!ref && !resolve && ancestor === void 0) return void 0;
|
|
2936
|
+
return {
|
|
2937
|
+
...ref !== void 0 && { ref },
|
|
2938
|
+
...resolve !== void 0 && { resolve },
|
|
2939
|
+
...ancestor !== void 0 && { ancestor }
|
|
2940
|
+
};
|
|
2941
|
+
}
|
|
2942
|
+
function parseRepoClone(raw) {
|
|
2943
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2944
|
+
const obj = raw;
|
|
2945
|
+
const depth = typeof obj.depth === "number" ? obj.depth : void 0;
|
|
2946
|
+
const filter = typeof obj.filter === "string" ? obj.filter : void 0;
|
|
2947
|
+
const sparse = Array.isArray(obj.sparse) ? obj.sparse.filter((s) => typeof s === "string") : void 0;
|
|
2948
|
+
if (depth === void 0 && !filter && !sparse) return void 0;
|
|
2949
|
+
return {
|
|
2950
|
+
...depth !== void 0 && { depth },
|
|
2951
|
+
...filter !== void 0 && { filter },
|
|
2952
|
+
...sparse !== void 0 && { sparse }
|
|
2953
|
+
};
|
|
2954
|
+
}
|
|
2955
|
+
function parseRepoConfig(raw) {
|
|
2956
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2957
|
+
const obj = raw;
|
|
2958
|
+
const repoPath = typeof obj.path === "string" ? obj.path : void 0;
|
|
2959
|
+
const source = parseRepoSource(obj.source);
|
|
2960
|
+
if (!repoPath || !source) return void 0;
|
|
2961
|
+
const checkout = parseRepoCheckout(obj.checkout);
|
|
2962
|
+
const clone = parseRepoClone(obj.clone);
|
|
2963
|
+
return {
|
|
2964
|
+
path: repoPath,
|
|
2965
|
+
source,
|
|
2966
|
+
...checkout !== void 0 && { checkout },
|
|
2967
|
+
...clone !== void 0 && { clone }
|
|
2968
|
+
};
|
|
2969
|
+
}
|
|
2970
|
+
|
|
2917
2971
|
// src/evaluation/formatting/prompt-builder.ts
|
|
2918
2972
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
2919
2973
|
const segmentsByMessage = testCase.input.map(
|
|
@@ -3308,58 +3362,6 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3308
3362
|
}
|
|
3309
3363
|
return cwd ? { ...config, cwd } : config;
|
|
3310
3364
|
}
|
|
3311
|
-
function parseRepoSource(raw) {
|
|
3312
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3313
|
-
const obj = raw;
|
|
3314
|
-
if (obj.type === "git" && typeof obj.url === "string") {
|
|
3315
|
-
return { type: "git", url: obj.url };
|
|
3316
|
-
}
|
|
3317
|
-
if (obj.type === "local" && typeof obj.path === "string") {
|
|
3318
|
-
return { type: "local", path: obj.path };
|
|
3319
|
-
}
|
|
3320
|
-
return void 0;
|
|
3321
|
-
}
|
|
3322
|
-
function parseRepoCheckout(raw) {
|
|
3323
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3324
|
-
const obj = raw;
|
|
3325
|
-
const ref = typeof obj.ref === "string" ? obj.ref : void 0;
|
|
3326
|
-
const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
|
|
3327
|
-
const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
|
|
3328
|
-
if (!ref && !resolve && ancestor === void 0) return void 0;
|
|
3329
|
-
return {
|
|
3330
|
-
...ref !== void 0 && { ref },
|
|
3331
|
-
...resolve !== void 0 && { resolve },
|
|
3332
|
-
...ancestor !== void 0 && { ancestor }
|
|
3333
|
-
};
|
|
3334
|
-
}
|
|
3335
|
-
function parseRepoClone(raw) {
|
|
3336
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3337
|
-
const obj = raw;
|
|
3338
|
-
const depth = typeof obj.depth === "number" ? obj.depth : void 0;
|
|
3339
|
-
const filter = typeof obj.filter === "string" ? obj.filter : void 0;
|
|
3340
|
-
const sparse = Array.isArray(obj.sparse) ? obj.sparse.filter((s) => typeof s === "string") : void 0;
|
|
3341
|
-
if (depth === void 0 && !filter && !sparse) return void 0;
|
|
3342
|
-
return {
|
|
3343
|
-
...depth !== void 0 && { depth },
|
|
3344
|
-
...filter !== void 0 && { filter },
|
|
3345
|
-
...sparse !== void 0 && { sparse }
|
|
3346
|
-
};
|
|
3347
|
-
}
|
|
3348
|
-
function parseRepoConfig(raw) {
|
|
3349
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3350
|
-
const obj = raw;
|
|
3351
|
-
const repoPath = typeof obj.path === "string" ? obj.path : void 0;
|
|
3352
|
-
const source = parseRepoSource(obj.source);
|
|
3353
|
-
if (!repoPath || !source) return void 0;
|
|
3354
|
-
const checkout = parseRepoCheckout(obj.checkout);
|
|
3355
|
-
const clone = parseRepoClone(obj.clone);
|
|
3356
|
-
return {
|
|
3357
|
-
path: repoPath,
|
|
3358
|
-
source,
|
|
3359
|
-
...checkout !== void 0 && { checkout },
|
|
3360
|
-
...clone !== void 0 && { clone }
|
|
3361
|
-
};
|
|
3362
|
-
}
|
|
3363
3365
|
function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
3364
3366
|
if (!isJsonObject(raw)) return void 0;
|
|
3365
3367
|
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
@@ -7134,6 +7136,25 @@ var CopilotSdkProvider = class {
|
|
|
7134
7136
|
content: systemPrompt
|
|
7135
7137
|
};
|
|
7136
7138
|
}
|
|
7139
|
+
if (this.config.byokBaseUrl) {
|
|
7140
|
+
const byokType = this.config.byokType ?? "openai";
|
|
7141
|
+
const provider = {
|
|
7142
|
+
type: byokType,
|
|
7143
|
+
baseUrl: normalizeByokBaseUrl(this.config.byokBaseUrl, byokType)
|
|
7144
|
+
};
|
|
7145
|
+
if (this.config.byokBearerToken) {
|
|
7146
|
+
provider.bearerToken = this.config.byokBearerToken;
|
|
7147
|
+
} else if (this.config.byokApiKey) {
|
|
7148
|
+
provider.apiKey = this.config.byokApiKey;
|
|
7149
|
+
}
|
|
7150
|
+
if (this.config.byokWireApi) {
|
|
7151
|
+
provider.wireApi = this.config.byokWireApi;
|
|
7152
|
+
}
|
|
7153
|
+
if (this.config.byokType === "azure" && this.config.byokApiVersion) {
|
|
7154
|
+
provider.azure = { apiVersion: this.config.byokApiVersion };
|
|
7155
|
+
}
|
|
7156
|
+
sessionOptions.provider = provider;
|
|
7157
|
+
}
|
|
7137
7158
|
let session;
|
|
7138
7159
|
try {
|
|
7139
7160
|
session = await client.createSession(sessionOptions);
|
|
@@ -7365,6 +7386,16 @@ function resolveSkillDirectories(cwd) {
|
|
|
7365
7386
|
];
|
|
7366
7387
|
return candidates.filter((dir) => existsSync2(dir));
|
|
7367
7388
|
}
|
|
7389
|
+
function normalizeByokBaseUrl(baseUrl, type) {
|
|
7390
|
+
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
7391
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
7392
|
+
return trimmed;
|
|
7393
|
+
}
|
|
7394
|
+
if (type === "azure") {
|
|
7395
|
+
return `https://${trimmed}.openai.azure.com`;
|
|
7396
|
+
}
|
|
7397
|
+
return trimmed;
|
|
7398
|
+
}
|
|
7368
7399
|
function summarizeSdkEvent(eventType, data) {
|
|
7369
7400
|
if (!data || typeof data !== "object") {
|
|
7370
7401
|
return eventType;
|
|
@@ -10794,15 +10825,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
10794
10825
|
});
|
|
10795
10826
|
}
|
|
10796
10827
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
10797
|
-
const { mkdir: mkdir16, readFile:
|
|
10828
|
+
const { mkdir: mkdir16, readFile: readFile16, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
10798
10829
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
10799
|
-
const
|
|
10830
|
+
const path51 = await import("node:path");
|
|
10800
10831
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
10801
|
-
const dir =
|
|
10832
|
+
const dir = path51.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
10802
10833
|
await mkdir16(dir, { recursive: true });
|
|
10803
|
-
const stdinPath =
|
|
10804
|
-
const stdoutPath =
|
|
10805
|
-
const stderrPath =
|
|
10834
|
+
const stdinPath = path51.join(dir, "stdin.txt");
|
|
10835
|
+
const stdoutPath = path51.join(dir, "stdout.txt");
|
|
10836
|
+
const stderrPath = path51.join(dir, "stderr.txt");
|
|
10806
10837
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
10807
10838
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
10808
10839
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -10832,8 +10863,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
10832
10863
|
resolve(code ?? 0);
|
|
10833
10864
|
});
|
|
10834
10865
|
});
|
|
10835
|
-
const stdout = (await
|
|
10836
|
-
const stderr = (await
|
|
10866
|
+
const stdout = (await readFile16(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10867
|
+
const stderr = (await readFile16(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10837
10868
|
return { stdout, stderr, exitCode };
|
|
10838
10869
|
} finally {
|
|
10839
10870
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -13092,115 +13123,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
13092
13123
|
* Evaluate a single field against the expected value.
|
|
13093
13124
|
*/
|
|
13094
13125
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
13095
|
-
const { path:
|
|
13096
|
-
const candidateValue = resolvePath(candidateData,
|
|
13097
|
-
const expectedValue = resolvePath(expectedData,
|
|
13126
|
+
const { path: path51, match, required = true, weight = 1 } = fieldConfig;
|
|
13127
|
+
const candidateValue = resolvePath(candidateData, path51);
|
|
13128
|
+
const expectedValue = resolvePath(expectedData, path51);
|
|
13098
13129
|
if (expectedValue === void 0) {
|
|
13099
13130
|
return {
|
|
13100
|
-
path:
|
|
13131
|
+
path: path51,
|
|
13101
13132
|
score: 1,
|
|
13102
13133
|
// No expected value means no comparison needed
|
|
13103
13134
|
weight,
|
|
13104
13135
|
hit: true,
|
|
13105
|
-
message: `${
|
|
13136
|
+
message: `${path51}: no expected value`
|
|
13106
13137
|
};
|
|
13107
13138
|
}
|
|
13108
13139
|
if (candidateValue === void 0) {
|
|
13109
13140
|
if (required) {
|
|
13110
13141
|
return {
|
|
13111
|
-
path:
|
|
13142
|
+
path: path51,
|
|
13112
13143
|
score: 0,
|
|
13113
13144
|
weight,
|
|
13114
13145
|
hit: false,
|
|
13115
|
-
message: `${
|
|
13146
|
+
message: `${path51} (required, missing)`
|
|
13116
13147
|
};
|
|
13117
13148
|
}
|
|
13118
13149
|
return {
|
|
13119
|
-
path:
|
|
13150
|
+
path: path51,
|
|
13120
13151
|
score: 1,
|
|
13121
13152
|
// Don't penalize missing optional fields
|
|
13122
13153
|
weight: 0,
|
|
13123
13154
|
// Zero weight means it won't affect the score
|
|
13124
13155
|
hit: true,
|
|
13125
|
-
message: `${
|
|
13156
|
+
message: `${path51}: optional field missing`
|
|
13126
13157
|
};
|
|
13127
13158
|
}
|
|
13128
13159
|
switch (match) {
|
|
13129
13160
|
case "exact":
|
|
13130
|
-
return this.compareExact(
|
|
13161
|
+
return this.compareExact(path51, candidateValue, expectedValue, weight);
|
|
13131
13162
|
case "numeric_tolerance":
|
|
13132
13163
|
return this.compareNumericTolerance(
|
|
13133
|
-
|
|
13164
|
+
path51,
|
|
13134
13165
|
candidateValue,
|
|
13135
13166
|
expectedValue,
|
|
13136
13167
|
fieldConfig,
|
|
13137
13168
|
weight
|
|
13138
13169
|
);
|
|
13139
13170
|
case "date":
|
|
13140
|
-
return this.compareDate(
|
|
13171
|
+
return this.compareDate(path51, candidateValue, expectedValue, fieldConfig, weight);
|
|
13141
13172
|
default:
|
|
13142
13173
|
return {
|
|
13143
|
-
path:
|
|
13174
|
+
path: path51,
|
|
13144
13175
|
score: 0,
|
|
13145
13176
|
weight,
|
|
13146
13177
|
hit: false,
|
|
13147
|
-
message: `${
|
|
13178
|
+
message: `${path51}: unknown match type "${match}"`
|
|
13148
13179
|
};
|
|
13149
13180
|
}
|
|
13150
13181
|
}
|
|
13151
13182
|
/**
|
|
13152
13183
|
* Exact equality comparison.
|
|
13153
13184
|
*/
|
|
13154
|
-
compareExact(
|
|
13185
|
+
compareExact(path51, candidateValue, expectedValue, weight) {
|
|
13155
13186
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
13156
13187
|
return {
|
|
13157
|
-
path:
|
|
13188
|
+
path: path51,
|
|
13158
13189
|
score: 1,
|
|
13159
13190
|
weight,
|
|
13160
13191
|
hit: true,
|
|
13161
|
-
message:
|
|
13192
|
+
message: path51
|
|
13162
13193
|
};
|
|
13163
13194
|
}
|
|
13164
13195
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
13165
13196
|
return {
|
|
13166
|
-
path:
|
|
13197
|
+
path: path51,
|
|
13167
13198
|
score: 0,
|
|
13168
13199
|
weight,
|
|
13169
13200
|
hit: false,
|
|
13170
|
-
message: `${
|
|
13201
|
+
message: `${path51} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
13171
13202
|
};
|
|
13172
13203
|
}
|
|
13173
13204
|
return {
|
|
13174
|
-
path:
|
|
13205
|
+
path: path51,
|
|
13175
13206
|
score: 0,
|
|
13176
13207
|
weight,
|
|
13177
13208
|
hit: false,
|
|
13178
|
-
message: `${
|
|
13209
|
+
message: `${path51} (value mismatch)`
|
|
13179
13210
|
};
|
|
13180
13211
|
}
|
|
13181
13212
|
/**
|
|
13182
13213
|
* Numeric comparison with absolute or relative tolerance.
|
|
13183
13214
|
*/
|
|
13184
|
-
compareNumericTolerance(
|
|
13215
|
+
compareNumericTolerance(path51, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13185
13216
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
13186
13217
|
const candidateNum = toNumber(candidateValue);
|
|
13187
13218
|
const expectedNum = toNumber(expectedValue);
|
|
13188
13219
|
if (candidateNum === null || expectedNum === null) {
|
|
13189
13220
|
return {
|
|
13190
|
-
path:
|
|
13221
|
+
path: path51,
|
|
13191
13222
|
score: 0,
|
|
13192
13223
|
weight,
|
|
13193
13224
|
hit: false,
|
|
13194
|
-
message: `${
|
|
13225
|
+
message: `${path51} (non-numeric value)`
|
|
13195
13226
|
};
|
|
13196
13227
|
}
|
|
13197
13228
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
13198
13229
|
return {
|
|
13199
|
-
path:
|
|
13230
|
+
path: path51,
|
|
13200
13231
|
score: 0,
|
|
13201
13232
|
weight,
|
|
13202
13233
|
hit: false,
|
|
13203
|
-
message: `${
|
|
13234
|
+
message: `${path51} (invalid numeric value)`
|
|
13204
13235
|
};
|
|
13205
13236
|
}
|
|
13206
13237
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -13213,61 +13244,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
13213
13244
|
}
|
|
13214
13245
|
if (withinTolerance) {
|
|
13215
13246
|
return {
|
|
13216
|
-
path:
|
|
13247
|
+
path: path51,
|
|
13217
13248
|
score: 1,
|
|
13218
13249
|
weight,
|
|
13219
13250
|
hit: true,
|
|
13220
|
-
message: `${
|
|
13251
|
+
message: `${path51} (within tolerance: diff=${diff.toFixed(2)})`
|
|
13221
13252
|
};
|
|
13222
13253
|
}
|
|
13223
13254
|
return {
|
|
13224
|
-
path:
|
|
13255
|
+
path: path51,
|
|
13225
13256
|
score: 0,
|
|
13226
13257
|
weight,
|
|
13227
13258
|
hit: false,
|
|
13228
|
-
message: `${
|
|
13259
|
+
message: `${path51} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
13229
13260
|
};
|
|
13230
13261
|
}
|
|
13231
13262
|
/**
|
|
13232
13263
|
* Date comparison with format normalization.
|
|
13233
13264
|
*/
|
|
13234
|
-
compareDate(
|
|
13265
|
+
compareDate(path51, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13235
13266
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
13236
13267
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
13237
13268
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
13238
13269
|
if (candidateDate === null) {
|
|
13239
13270
|
return {
|
|
13240
|
-
path:
|
|
13271
|
+
path: path51,
|
|
13241
13272
|
score: 0,
|
|
13242
13273
|
weight,
|
|
13243
13274
|
hit: false,
|
|
13244
|
-
message: `${
|
|
13275
|
+
message: `${path51} (unparseable candidate date)`
|
|
13245
13276
|
};
|
|
13246
13277
|
}
|
|
13247
13278
|
if (expectedDate === null) {
|
|
13248
13279
|
return {
|
|
13249
|
-
path:
|
|
13280
|
+
path: path51,
|
|
13250
13281
|
score: 0,
|
|
13251
13282
|
weight,
|
|
13252
13283
|
hit: false,
|
|
13253
|
-
message: `${
|
|
13284
|
+
message: `${path51} (unparseable expected date)`
|
|
13254
13285
|
};
|
|
13255
13286
|
}
|
|
13256
13287
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
13257
13288
|
return {
|
|
13258
|
-
path:
|
|
13289
|
+
path: path51,
|
|
13259
13290
|
score: 1,
|
|
13260
13291
|
weight,
|
|
13261
13292
|
hit: true,
|
|
13262
|
-
message:
|
|
13293
|
+
message: path51
|
|
13263
13294
|
};
|
|
13264
13295
|
}
|
|
13265
13296
|
return {
|
|
13266
|
-
path:
|
|
13297
|
+
path: path51,
|
|
13267
13298
|
score: 0,
|
|
13268
13299
|
weight,
|
|
13269
13300
|
hit: false,
|
|
13270
|
-
message: `${
|
|
13301
|
+
message: `${path51} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
13271
13302
|
};
|
|
13272
13303
|
}
|
|
13273
13304
|
/**
|
|
@@ -13300,11 +13331,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
13300
13331
|
};
|
|
13301
13332
|
}
|
|
13302
13333
|
};
|
|
13303
|
-
function resolvePath(obj,
|
|
13304
|
-
if (!
|
|
13334
|
+
function resolvePath(obj, path51) {
|
|
13335
|
+
if (!path51 || !obj) {
|
|
13305
13336
|
return void 0;
|
|
13306
13337
|
}
|
|
13307
|
-
const parts =
|
|
13338
|
+
const parts = path51.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
13308
13339
|
let current = obj;
|
|
13309
13340
|
for (const part of parts) {
|
|
13310
13341
|
if (current === null || current === void 0) {
|
|
@@ -13796,8 +13827,8 @@ var TokenUsageEvaluator = class {
|
|
|
13796
13827
|
};
|
|
13797
13828
|
|
|
13798
13829
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
13799
|
-
function getNestedValue(obj,
|
|
13800
|
-
const parts =
|
|
13830
|
+
function getNestedValue(obj, path51) {
|
|
13831
|
+
const parts = path51.split(".");
|
|
13801
13832
|
let current = obj;
|
|
13802
13833
|
for (const part of parts) {
|
|
13803
13834
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -18295,9 +18326,106 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
18295
18326
|
return parts.join("\n");
|
|
18296
18327
|
}
|
|
18297
18328
|
|
|
18298
|
-
// src/evaluation/
|
|
18299
|
-
import {
|
|
18329
|
+
// src/evaluation/workspace/deps-scanner.ts
|
|
18330
|
+
import { readFile as readFile13 } from "node:fs/promises";
|
|
18300
18331
|
import path46 from "node:path";
|
|
18332
|
+
import { parse as parse5 } from "yaml";
|
|
18333
|
+
function normalizeGitUrl(url) {
|
|
18334
|
+
let normalized = url.replace(/\.git$/, "");
|
|
18335
|
+
try {
|
|
18336
|
+
const parsed = new URL(normalized);
|
|
18337
|
+
parsed.hostname = parsed.hostname.toLowerCase();
|
|
18338
|
+
normalized = parsed.toString().replace(/\/$/, "");
|
|
18339
|
+
} catch {
|
|
18340
|
+
}
|
|
18341
|
+
return normalized;
|
|
18342
|
+
}
|
|
18343
|
+
async function scanRepoDeps(evalFilePaths) {
|
|
18344
|
+
const seen = /* @__PURE__ */ new Map();
|
|
18345
|
+
const errors = [];
|
|
18346
|
+
for (const filePath of evalFilePaths) {
|
|
18347
|
+
try {
|
|
18348
|
+
const repos = await extractReposFromEvalFile(filePath);
|
|
18349
|
+
for (const repo of repos) {
|
|
18350
|
+
if (repo.source.type !== "git") continue;
|
|
18351
|
+
const ref = repo.checkout?.ref;
|
|
18352
|
+
const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ""}`;
|
|
18353
|
+
const existing = seen.get(key);
|
|
18354
|
+
if (existing) {
|
|
18355
|
+
existing.usedBy.push(filePath);
|
|
18356
|
+
} else {
|
|
18357
|
+
const { ref: _ref, ...checkoutRest } = repo.checkout ?? {};
|
|
18358
|
+
const hasCheckout = Object.keys(checkoutRest).length > 0;
|
|
18359
|
+
seen.set(key, {
|
|
18360
|
+
url: repo.source.url,
|
|
18361
|
+
ref,
|
|
18362
|
+
clone: repo.clone,
|
|
18363
|
+
checkout: hasCheckout ? checkoutRest : void 0,
|
|
18364
|
+
usedBy: [filePath]
|
|
18365
|
+
});
|
|
18366
|
+
}
|
|
18367
|
+
}
|
|
18368
|
+
} catch (err) {
|
|
18369
|
+
errors.push({
|
|
18370
|
+
file: filePath,
|
|
18371
|
+
message: err instanceof Error ? err.message : String(err)
|
|
18372
|
+
});
|
|
18373
|
+
}
|
|
18374
|
+
}
|
|
18375
|
+
return { repos: [...seen.values()], errors };
|
|
18376
|
+
}
|
|
18377
|
+
async function extractReposFromEvalFile(filePath) {
|
|
18378
|
+
const content = await readFile13(filePath, "utf8");
|
|
18379
|
+
const parsed = interpolateEnv(parse5(content), process.env);
|
|
18380
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18381
|
+
const obj = parsed;
|
|
18382
|
+
const evalFileDir = path46.dirname(path46.resolve(filePath));
|
|
18383
|
+
const repos = [];
|
|
18384
|
+
const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
|
|
18385
|
+
repos.push(...suiteRepos);
|
|
18386
|
+
const tests = Array.isArray(obj.tests) ? obj.tests : [];
|
|
18387
|
+
for (const test of tests) {
|
|
18388
|
+
if (test && typeof test === "object" && !Array.isArray(test)) {
|
|
18389
|
+
const testObj = test;
|
|
18390
|
+
const testRepos = await extractReposFromWorkspaceRaw(testObj.workspace, evalFileDir);
|
|
18391
|
+
repos.push(...testRepos);
|
|
18392
|
+
}
|
|
18393
|
+
}
|
|
18394
|
+
return repos;
|
|
18395
|
+
}
|
|
18396
|
+
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
18397
|
+
if (typeof raw === "string") {
|
|
18398
|
+
const workspaceFilePath = path46.resolve(evalFileDir, raw);
|
|
18399
|
+
const content = await readFile13(workspaceFilePath, "utf8");
|
|
18400
|
+
const parsed = interpolateEnv(parse5(content), process.env);
|
|
18401
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18402
|
+
return extractReposFromObject(parsed);
|
|
18403
|
+
}
|
|
18404
|
+
if (raw && typeof raw === "object" && !Array.isArray(raw)) {
|
|
18405
|
+
return extractReposFromObject(raw);
|
|
18406
|
+
}
|
|
18407
|
+
return [];
|
|
18408
|
+
}
|
|
18409
|
+
function extractReposFromObject(obj) {
|
|
18410
|
+
const rawRepos = Array.isArray(obj.repos) ? obj.repos : [];
|
|
18411
|
+
const result = [];
|
|
18412
|
+
for (const r of rawRepos) {
|
|
18413
|
+
if (!r || typeof r !== "object" || Array.isArray(r)) continue;
|
|
18414
|
+
const repo = r;
|
|
18415
|
+
const source = parseRepoSource(repo.source);
|
|
18416
|
+
if (!source) continue;
|
|
18417
|
+
result.push({
|
|
18418
|
+
source,
|
|
18419
|
+
checkout: parseRepoCheckout(repo.checkout),
|
|
18420
|
+
clone: parseRepoClone(repo.clone)
|
|
18421
|
+
});
|
|
18422
|
+
}
|
|
18423
|
+
return result;
|
|
18424
|
+
}
|
|
18425
|
+
|
|
18426
|
+
// src/evaluation/cache/response-cache.ts
|
|
18427
|
+
import { mkdir as mkdir15, readFile as readFile14, writeFile as writeFile8 } from "node:fs/promises";
|
|
18428
|
+
import path47 from "node:path";
|
|
18301
18429
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18302
18430
|
var ResponseCache = class {
|
|
18303
18431
|
cachePath;
|
|
@@ -18307,7 +18435,7 @@ var ResponseCache = class {
|
|
|
18307
18435
|
async get(key) {
|
|
18308
18436
|
const filePath = this.keyToPath(key);
|
|
18309
18437
|
try {
|
|
18310
|
-
const data = await
|
|
18438
|
+
const data = await readFile14(filePath, "utf8");
|
|
18311
18439
|
return JSON.parse(data);
|
|
18312
18440
|
} catch {
|
|
18313
18441
|
return void 0;
|
|
@@ -18315,13 +18443,13 @@ var ResponseCache = class {
|
|
|
18315
18443
|
}
|
|
18316
18444
|
async set(key, value) {
|
|
18317
18445
|
const filePath = this.keyToPath(key);
|
|
18318
|
-
const dir =
|
|
18446
|
+
const dir = path47.dirname(filePath);
|
|
18319
18447
|
await mkdir15(dir, { recursive: true });
|
|
18320
18448
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
18321
18449
|
}
|
|
18322
18450
|
keyToPath(key) {
|
|
18323
18451
|
const prefix = key.slice(0, 2);
|
|
18324
|
-
return
|
|
18452
|
+
return path47.join(this.cachePath, prefix, `${key}.json`);
|
|
18325
18453
|
}
|
|
18326
18454
|
};
|
|
18327
18455
|
function shouldEnableCache(params) {
|
|
@@ -18338,10 +18466,10 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
18338
18466
|
|
|
18339
18467
|
// src/projects.ts
|
|
18340
18468
|
import { existsSync as existsSync6, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
|
|
18341
|
-
import
|
|
18469
|
+
import path48 from "node:path";
|
|
18342
18470
|
import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
|
|
18343
18471
|
function getProjectsRegistryPath() {
|
|
18344
|
-
return
|
|
18472
|
+
return path48.join(getAgentvHome(), "projects.yaml");
|
|
18345
18473
|
}
|
|
18346
18474
|
function loadProjectRegistry() {
|
|
18347
18475
|
const registryPath = getProjectsRegistryPath();
|
|
@@ -18361,14 +18489,14 @@ function loadProjectRegistry() {
|
|
|
18361
18489
|
}
|
|
18362
18490
|
function saveProjectRegistry(registry) {
|
|
18363
18491
|
const registryPath = getProjectsRegistryPath();
|
|
18364
|
-
const dir =
|
|
18492
|
+
const dir = path48.dirname(registryPath);
|
|
18365
18493
|
if (!existsSync6(dir)) {
|
|
18366
18494
|
mkdirSync2(dir, { recursive: true });
|
|
18367
18495
|
}
|
|
18368
18496
|
writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
|
|
18369
18497
|
}
|
|
18370
18498
|
function deriveProjectId(dirPath, existingIds) {
|
|
18371
|
-
const base =
|
|
18499
|
+
const base = path48.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
18372
18500
|
let candidate = base || "project";
|
|
18373
18501
|
let suffix = 2;
|
|
18374
18502
|
while (existingIds.includes(candidate)) {
|
|
@@ -18378,11 +18506,11 @@ function deriveProjectId(dirPath, existingIds) {
|
|
|
18378
18506
|
return candidate;
|
|
18379
18507
|
}
|
|
18380
18508
|
function addProject(projectPath) {
|
|
18381
|
-
const absPath =
|
|
18509
|
+
const absPath = path48.resolve(projectPath);
|
|
18382
18510
|
if (!existsSync6(absPath)) {
|
|
18383
18511
|
throw new Error(`Directory not found: ${absPath}`);
|
|
18384
18512
|
}
|
|
18385
|
-
if (!existsSync6(
|
|
18513
|
+
if (!existsSync6(path48.join(absPath, ".agentv"))) {
|
|
18386
18514
|
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
18387
18515
|
}
|
|
18388
18516
|
const registry = loadProjectRegistry();
|
|
@@ -18396,7 +18524,7 @@ function addProject(projectPath) {
|
|
|
18396
18524
|
absPath,
|
|
18397
18525
|
registry.projects.map((p) => p.id)
|
|
18398
18526
|
),
|
|
18399
|
-
name:
|
|
18527
|
+
name: path48.basename(absPath),
|
|
18400
18528
|
path: absPath,
|
|
18401
18529
|
addedAt: now,
|
|
18402
18530
|
lastOpenedAt: now
|
|
@@ -18425,14 +18553,14 @@ function touchProject(projectId) {
|
|
|
18425
18553
|
}
|
|
18426
18554
|
}
|
|
18427
18555
|
function discoverProjects(rootDir, maxDepth = 2) {
|
|
18428
|
-
const absRoot =
|
|
18556
|
+
const absRoot = path48.resolve(rootDir);
|
|
18429
18557
|
if (!existsSync6(absRoot) || !statSync2(absRoot).isDirectory()) {
|
|
18430
18558
|
return [];
|
|
18431
18559
|
}
|
|
18432
18560
|
const results = [];
|
|
18433
18561
|
function scan(dir, depth) {
|
|
18434
18562
|
if (depth > maxDepth) return;
|
|
18435
|
-
if (existsSync6(
|
|
18563
|
+
if (existsSync6(path48.join(dir, ".agentv"))) {
|
|
18436
18564
|
results.push(dir);
|
|
18437
18565
|
return;
|
|
18438
18566
|
}
|
|
@@ -18442,7 +18570,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
|
|
|
18442
18570
|
for (const entry of entries) {
|
|
18443
18571
|
if (!entry.isDirectory()) continue;
|
|
18444
18572
|
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
18445
|
-
scan(
|
|
18573
|
+
scan(path48.join(dir, entry.name), depth + 1);
|
|
18446
18574
|
}
|
|
18447
18575
|
} catch {
|
|
18448
18576
|
}
|
|
@@ -19355,8 +19483,8 @@ function extractResponseItemContent(content) {
|
|
|
19355
19483
|
// src/import/codex-session-discovery.ts
|
|
19356
19484
|
import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
|
|
19357
19485
|
import { homedir as homedir3 } from "node:os";
|
|
19358
|
-
import
|
|
19359
|
-
var DEFAULT_SESSIONS_DIR = () =>
|
|
19486
|
+
import path49 from "node:path";
|
|
19487
|
+
var DEFAULT_SESSIONS_DIR = () => path49.join(homedir3(), ".codex", "sessions");
|
|
19360
19488
|
async function discoverCodexSessions(opts) {
|
|
19361
19489
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
19362
19490
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
@@ -19368,7 +19496,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19368
19496
|
return [];
|
|
19369
19497
|
}
|
|
19370
19498
|
for (const year of yearDirs) {
|
|
19371
|
-
const yearPath =
|
|
19499
|
+
const yearPath = path49.join(sessionsDir, year);
|
|
19372
19500
|
let monthDirs;
|
|
19373
19501
|
try {
|
|
19374
19502
|
monthDirs = await readdir8(yearPath);
|
|
@@ -19376,7 +19504,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19376
19504
|
continue;
|
|
19377
19505
|
}
|
|
19378
19506
|
for (const month of monthDirs) {
|
|
19379
|
-
const monthPath =
|
|
19507
|
+
const monthPath = path49.join(yearPath, month);
|
|
19380
19508
|
let dayDirs;
|
|
19381
19509
|
try {
|
|
19382
19510
|
dayDirs = await readdir8(monthPath);
|
|
@@ -19388,7 +19516,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19388
19516
|
const dirDate = `${year}-${month}-${day}`;
|
|
19389
19517
|
if (dirDate !== opts.date) continue;
|
|
19390
19518
|
}
|
|
19391
|
-
const dayPath =
|
|
19519
|
+
const dayPath = path49.join(monthPath, day);
|
|
19392
19520
|
let files;
|
|
19393
19521
|
try {
|
|
19394
19522
|
files = await readdir8(dayPath);
|
|
@@ -19397,7 +19525,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19397
19525
|
}
|
|
19398
19526
|
for (const file of files) {
|
|
19399
19527
|
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
19400
|
-
const filePath =
|
|
19528
|
+
const filePath = path49.join(dayPath, file);
|
|
19401
19529
|
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
19402
19530
|
const parts = nameWithoutExt.split("-");
|
|
19403
19531
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
@@ -19420,8 +19548,8 @@ async function discoverCodexSessions(opts) {
|
|
|
19420
19548
|
// src/import/session-discovery.ts
|
|
19421
19549
|
import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
|
|
19422
19550
|
import { homedir as homedir4 } from "node:os";
|
|
19423
|
-
import
|
|
19424
|
-
var DEFAULT_PROJECTS_DIR = () =>
|
|
19551
|
+
import path50 from "node:path";
|
|
19552
|
+
var DEFAULT_PROJECTS_DIR = () => path50.join(homedir4(), ".claude", "projects");
|
|
19425
19553
|
function encodeProjectPath(projectPath) {
|
|
19426
19554
|
return projectPath.replace(/\//g, "-");
|
|
19427
19555
|
}
|
|
@@ -19440,7 +19568,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19440
19568
|
}
|
|
19441
19569
|
const sessions = [];
|
|
19442
19570
|
for (const projectDir of projectDirs) {
|
|
19443
|
-
const dirPath =
|
|
19571
|
+
const dirPath = path50.join(projectsDir, projectDir);
|
|
19444
19572
|
let entries;
|
|
19445
19573
|
try {
|
|
19446
19574
|
entries = await readdir9(dirPath);
|
|
@@ -19451,7 +19579,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19451
19579
|
if (!entry.endsWith(".jsonl")) continue;
|
|
19452
19580
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
19453
19581
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
19454
|
-
const filePath =
|
|
19582
|
+
const filePath = path50.join(dirPath, entry);
|
|
19455
19583
|
let updatedAt;
|
|
19456
19584
|
try {
|
|
19457
19585
|
const fileStat = await stat10(filePath);
|
|
@@ -19472,7 +19600,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19472
19600
|
}
|
|
19473
19601
|
|
|
19474
19602
|
// src/import/types.ts
|
|
19475
|
-
import { readFile as
|
|
19603
|
+
import { readFile as readFile15 } from "node:fs/promises";
|
|
19476
19604
|
function toTranscriptJsonLine(entry) {
|
|
19477
19605
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
19478
19606
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -19498,11 +19626,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
19498
19626
|
};
|
|
19499
19627
|
}
|
|
19500
19628
|
async function readTranscriptJsonl(filePath) {
|
|
19501
|
-
const text = await
|
|
19629
|
+
const text = await readFile15(filePath, "utf8");
|
|
19502
19630
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
19503
19631
|
}
|
|
19504
19632
|
async function readTranscriptFile(filePath) {
|
|
19505
|
-
return
|
|
19633
|
+
return readFile15(filePath, "utf8");
|
|
19506
19634
|
}
|
|
19507
19635
|
|
|
19508
19636
|
// src/import/transcript-provider.ts
|
|
@@ -19716,6 +19844,7 @@ export {
|
|
|
19716
19844
|
runRegexAssertion,
|
|
19717
19845
|
runStartsWithAssertion,
|
|
19718
19846
|
saveProjectRegistry,
|
|
19847
|
+
scanRepoDeps,
|
|
19719
19848
|
scoreToVerdict,
|
|
19720
19849
|
shouldEnableCache,
|
|
19721
19850
|
shouldSkipCacheForTemperature,
|