@agentv/core 4.8.0 → 4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +255 -152
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +33 -1
- package/dist/index.d.ts +33 -1
- package/dist/index.js +232 -132
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -2914,6 +2914,60 @@ function parseMetadata(suite) {
|
|
|
2914
2914
|
});
|
|
2915
2915
|
}
|
|
2916
2916
|
|
|
2917
|
+
// src/evaluation/workspace/repo-config-parser.ts
|
|
2918
|
+
function parseRepoSource(raw) {
|
|
2919
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2920
|
+
const obj = raw;
|
|
2921
|
+
if (obj.type === "git" && typeof obj.url === "string") {
|
|
2922
|
+
return { type: "git", url: obj.url };
|
|
2923
|
+
}
|
|
2924
|
+
if (obj.type === "local" && typeof obj.path === "string") {
|
|
2925
|
+
return { type: "local", path: obj.path };
|
|
2926
|
+
}
|
|
2927
|
+
return void 0;
|
|
2928
|
+
}
|
|
2929
|
+
function parseRepoCheckout(raw) {
|
|
2930
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2931
|
+
const obj = raw;
|
|
2932
|
+
const ref = typeof obj.ref === "string" ? obj.ref : void 0;
|
|
2933
|
+
const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
|
|
2934
|
+
const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
|
|
2935
|
+
if (!ref && !resolve && ancestor === void 0) return void 0;
|
|
2936
|
+
return {
|
|
2937
|
+
...ref !== void 0 && { ref },
|
|
2938
|
+
...resolve !== void 0 && { resolve },
|
|
2939
|
+
...ancestor !== void 0 && { ancestor }
|
|
2940
|
+
};
|
|
2941
|
+
}
|
|
2942
|
+
function parseRepoClone(raw) {
|
|
2943
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2944
|
+
const obj = raw;
|
|
2945
|
+
const depth = typeof obj.depth === "number" ? obj.depth : void 0;
|
|
2946
|
+
const filter = typeof obj.filter === "string" ? obj.filter : void 0;
|
|
2947
|
+
const sparse = Array.isArray(obj.sparse) ? obj.sparse.filter((s) => typeof s === "string") : void 0;
|
|
2948
|
+
if (depth === void 0 && !filter && !sparse) return void 0;
|
|
2949
|
+
return {
|
|
2950
|
+
...depth !== void 0 && { depth },
|
|
2951
|
+
...filter !== void 0 && { filter },
|
|
2952
|
+
...sparse !== void 0 && { sparse }
|
|
2953
|
+
};
|
|
2954
|
+
}
|
|
2955
|
+
function parseRepoConfig(raw) {
|
|
2956
|
+
if (!isJsonObject(raw)) return void 0;
|
|
2957
|
+
const obj = raw;
|
|
2958
|
+
const repoPath = typeof obj.path === "string" ? obj.path : void 0;
|
|
2959
|
+
const source = parseRepoSource(obj.source);
|
|
2960
|
+
if (!repoPath || !source) return void 0;
|
|
2961
|
+
const checkout = parseRepoCheckout(obj.checkout);
|
|
2962
|
+
const clone = parseRepoClone(obj.clone);
|
|
2963
|
+
return {
|
|
2964
|
+
path: repoPath,
|
|
2965
|
+
source,
|
|
2966
|
+
...checkout !== void 0 && { checkout },
|
|
2967
|
+
...clone !== void 0 && { clone }
|
|
2968
|
+
};
|
|
2969
|
+
}
|
|
2970
|
+
|
|
2917
2971
|
// src/evaluation/formatting/prompt-builder.ts
|
|
2918
2972
|
async function buildPromptInputs(testCase, mode = "lm") {
|
|
2919
2973
|
const segmentsByMessage = testCase.input.map(
|
|
@@ -3308,58 +3362,6 @@ function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
|
3308
3362
|
}
|
|
3309
3363
|
return cwd ? { ...config, cwd } : config;
|
|
3310
3364
|
}
|
|
3311
|
-
function parseRepoSource(raw) {
|
|
3312
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3313
|
-
const obj = raw;
|
|
3314
|
-
if (obj.type === "git" && typeof obj.url === "string") {
|
|
3315
|
-
return { type: "git", url: obj.url };
|
|
3316
|
-
}
|
|
3317
|
-
if (obj.type === "local" && typeof obj.path === "string") {
|
|
3318
|
-
return { type: "local", path: obj.path };
|
|
3319
|
-
}
|
|
3320
|
-
return void 0;
|
|
3321
|
-
}
|
|
3322
|
-
function parseRepoCheckout(raw) {
|
|
3323
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3324
|
-
const obj = raw;
|
|
3325
|
-
const ref = typeof obj.ref === "string" ? obj.ref : void 0;
|
|
3326
|
-
const resolve = obj.resolve === "remote" || obj.resolve === "local" ? obj.resolve : void 0;
|
|
3327
|
-
const ancestor = typeof obj.ancestor === "number" ? obj.ancestor : void 0;
|
|
3328
|
-
if (!ref && !resolve && ancestor === void 0) return void 0;
|
|
3329
|
-
return {
|
|
3330
|
-
...ref !== void 0 && { ref },
|
|
3331
|
-
...resolve !== void 0 && { resolve },
|
|
3332
|
-
...ancestor !== void 0 && { ancestor }
|
|
3333
|
-
};
|
|
3334
|
-
}
|
|
3335
|
-
function parseRepoClone(raw) {
|
|
3336
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3337
|
-
const obj = raw;
|
|
3338
|
-
const depth = typeof obj.depth === "number" ? obj.depth : void 0;
|
|
3339
|
-
const filter = typeof obj.filter === "string" ? obj.filter : void 0;
|
|
3340
|
-
const sparse = Array.isArray(obj.sparse) ? obj.sparse.filter((s) => typeof s === "string") : void 0;
|
|
3341
|
-
if (depth === void 0 && !filter && !sparse) return void 0;
|
|
3342
|
-
return {
|
|
3343
|
-
...depth !== void 0 && { depth },
|
|
3344
|
-
...filter !== void 0 && { filter },
|
|
3345
|
-
...sparse !== void 0 && { sparse }
|
|
3346
|
-
};
|
|
3347
|
-
}
|
|
3348
|
-
function parseRepoConfig(raw) {
|
|
3349
|
-
if (!isJsonObject(raw)) return void 0;
|
|
3350
|
-
const obj = raw;
|
|
3351
|
-
const repoPath = typeof obj.path === "string" ? obj.path : void 0;
|
|
3352
|
-
const source = parseRepoSource(obj.source);
|
|
3353
|
-
if (!repoPath || !source) return void 0;
|
|
3354
|
-
const checkout = parseRepoCheckout(obj.checkout);
|
|
3355
|
-
const clone = parseRepoClone(obj.clone);
|
|
3356
|
-
return {
|
|
3357
|
-
path: repoPath,
|
|
3358
|
-
source,
|
|
3359
|
-
...checkout !== void 0 && { checkout },
|
|
3360
|
-
...clone !== void 0 && { clone }
|
|
3361
|
-
};
|
|
3362
|
-
}
|
|
3363
3365
|
function parseWorkspaceHookConfig(raw, evalFileDir) {
|
|
3364
3366
|
if (!isJsonObject(raw)) return void 0;
|
|
3365
3367
|
const script = parseWorkspaceScriptConfig(raw, evalFileDir);
|
|
@@ -10823,15 +10825,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
10823
10825
|
});
|
|
10824
10826
|
}
|
|
10825
10827
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
10826
|
-
const { mkdir: mkdir16, readFile:
|
|
10828
|
+
const { mkdir: mkdir16, readFile: readFile16, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
10827
10829
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
10828
|
-
const
|
|
10830
|
+
const path51 = await import("node:path");
|
|
10829
10831
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
10830
|
-
const dir =
|
|
10832
|
+
const dir = path51.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
10831
10833
|
await mkdir16(dir, { recursive: true });
|
|
10832
|
-
const stdinPath =
|
|
10833
|
-
const stdoutPath =
|
|
10834
|
-
const stderrPath =
|
|
10834
|
+
const stdinPath = path51.join(dir, "stdin.txt");
|
|
10835
|
+
const stdoutPath = path51.join(dir, "stdout.txt");
|
|
10836
|
+
const stderrPath = path51.join(dir, "stderr.txt");
|
|
10835
10837
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
10836
10838
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
10837
10839
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -10861,8 +10863,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
10861
10863
|
resolve(code ?? 0);
|
|
10862
10864
|
});
|
|
10863
10865
|
});
|
|
10864
|
-
const stdout = (await
|
|
10865
|
-
const stderr = (await
|
|
10866
|
+
const stdout = (await readFile16(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10867
|
+
const stderr = (await readFile16(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10866
10868
|
return { stdout, stderr, exitCode };
|
|
10867
10869
|
} finally {
|
|
10868
10870
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -13121,115 +13123,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
13121
13123
|
* Evaluate a single field against the expected value.
|
|
13122
13124
|
*/
|
|
13123
13125
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
13124
|
-
const { path:
|
|
13125
|
-
const candidateValue = resolvePath(candidateData,
|
|
13126
|
-
const expectedValue = resolvePath(expectedData,
|
|
13126
|
+
const { path: path51, match, required = true, weight = 1 } = fieldConfig;
|
|
13127
|
+
const candidateValue = resolvePath(candidateData, path51);
|
|
13128
|
+
const expectedValue = resolvePath(expectedData, path51);
|
|
13127
13129
|
if (expectedValue === void 0) {
|
|
13128
13130
|
return {
|
|
13129
|
-
path:
|
|
13131
|
+
path: path51,
|
|
13130
13132
|
score: 1,
|
|
13131
13133
|
// No expected value means no comparison needed
|
|
13132
13134
|
weight,
|
|
13133
13135
|
hit: true,
|
|
13134
|
-
message: `${
|
|
13136
|
+
message: `${path51}: no expected value`
|
|
13135
13137
|
};
|
|
13136
13138
|
}
|
|
13137
13139
|
if (candidateValue === void 0) {
|
|
13138
13140
|
if (required) {
|
|
13139
13141
|
return {
|
|
13140
|
-
path:
|
|
13142
|
+
path: path51,
|
|
13141
13143
|
score: 0,
|
|
13142
13144
|
weight,
|
|
13143
13145
|
hit: false,
|
|
13144
|
-
message: `${
|
|
13146
|
+
message: `${path51} (required, missing)`
|
|
13145
13147
|
};
|
|
13146
13148
|
}
|
|
13147
13149
|
return {
|
|
13148
|
-
path:
|
|
13150
|
+
path: path51,
|
|
13149
13151
|
score: 1,
|
|
13150
13152
|
// Don't penalize missing optional fields
|
|
13151
13153
|
weight: 0,
|
|
13152
13154
|
// Zero weight means it won't affect the score
|
|
13153
13155
|
hit: true,
|
|
13154
|
-
message: `${
|
|
13156
|
+
message: `${path51}: optional field missing`
|
|
13155
13157
|
};
|
|
13156
13158
|
}
|
|
13157
13159
|
switch (match) {
|
|
13158
13160
|
case "exact":
|
|
13159
|
-
return this.compareExact(
|
|
13161
|
+
return this.compareExact(path51, candidateValue, expectedValue, weight);
|
|
13160
13162
|
case "numeric_tolerance":
|
|
13161
13163
|
return this.compareNumericTolerance(
|
|
13162
|
-
|
|
13164
|
+
path51,
|
|
13163
13165
|
candidateValue,
|
|
13164
13166
|
expectedValue,
|
|
13165
13167
|
fieldConfig,
|
|
13166
13168
|
weight
|
|
13167
13169
|
);
|
|
13168
13170
|
case "date":
|
|
13169
|
-
return this.compareDate(
|
|
13171
|
+
return this.compareDate(path51, candidateValue, expectedValue, fieldConfig, weight);
|
|
13170
13172
|
default:
|
|
13171
13173
|
return {
|
|
13172
|
-
path:
|
|
13174
|
+
path: path51,
|
|
13173
13175
|
score: 0,
|
|
13174
13176
|
weight,
|
|
13175
13177
|
hit: false,
|
|
13176
|
-
message: `${
|
|
13178
|
+
message: `${path51}: unknown match type "${match}"`
|
|
13177
13179
|
};
|
|
13178
13180
|
}
|
|
13179
13181
|
}
|
|
13180
13182
|
/**
|
|
13181
13183
|
* Exact equality comparison.
|
|
13182
13184
|
*/
|
|
13183
|
-
compareExact(
|
|
13185
|
+
compareExact(path51, candidateValue, expectedValue, weight) {
|
|
13184
13186
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
13185
13187
|
return {
|
|
13186
|
-
path:
|
|
13188
|
+
path: path51,
|
|
13187
13189
|
score: 1,
|
|
13188
13190
|
weight,
|
|
13189
13191
|
hit: true,
|
|
13190
|
-
message:
|
|
13192
|
+
message: path51
|
|
13191
13193
|
};
|
|
13192
13194
|
}
|
|
13193
13195
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
13194
13196
|
return {
|
|
13195
|
-
path:
|
|
13197
|
+
path: path51,
|
|
13196
13198
|
score: 0,
|
|
13197
13199
|
weight,
|
|
13198
13200
|
hit: false,
|
|
13199
|
-
message: `${
|
|
13201
|
+
message: `${path51} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
13200
13202
|
};
|
|
13201
13203
|
}
|
|
13202
13204
|
return {
|
|
13203
|
-
path:
|
|
13205
|
+
path: path51,
|
|
13204
13206
|
score: 0,
|
|
13205
13207
|
weight,
|
|
13206
13208
|
hit: false,
|
|
13207
|
-
message: `${
|
|
13209
|
+
message: `${path51} (value mismatch)`
|
|
13208
13210
|
};
|
|
13209
13211
|
}
|
|
13210
13212
|
/**
|
|
13211
13213
|
* Numeric comparison with absolute or relative tolerance.
|
|
13212
13214
|
*/
|
|
13213
|
-
compareNumericTolerance(
|
|
13215
|
+
compareNumericTolerance(path51, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13214
13216
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
13215
13217
|
const candidateNum = toNumber(candidateValue);
|
|
13216
13218
|
const expectedNum = toNumber(expectedValue);
|
|
13217
13219
|
if (candidateNum === null || expectedNum === null) {
|
|
13218
13220
|
return {
|
|
13219
|
-
path:
|
|
13221
|
+
path: path51,
|
|
13220
13222
|
score: 0,
|
|
13221
13223
|
weight,
|
|
13222
13224
|
hit: false,
|
|
13223
|
-
message: `${
|
|
13225
|
+
message: `${path51} (non-numeric value)`
|
|
13224
13226
|
};
|
|
13225
13227
|
}
|
|
13226
13228
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
13227
13229
|
return {
|
|
13228
|
-
path:
|
|
13230
|
+
path: path51,
|
|
13229
13231
|
score: 0,
|
|
13230
13232
|
weight,
|
|
13231
13233
|
hit: false,
|
|
13232
|
-
message: `${
|
|
13234
|
+
message: `${path51} (invalid numeric value)`
|
|
13233
13235
|
};
|
|
13234
13236
|
}
|
|
13235
13237
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -13242,61 +13244,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
13242
13244
|
}
|
|
13243
13245
|
if (withinTolerance) {
|
|
13244
13246
|
return {
|
|
13245
|
-
path:
|
|
13247
|
+
path: path51,
|
|
13246
13248
|
score: 1,
|
|
13247
13249
|
weight,
|
|
13248
13250
|
hit: true,
|
|
13249
|
-
message: `${
|
|
13251
|
+
message: `${path51} (within tolerance: diff=${diff.toFixed(2)})`
|
|
13250
13252
|
};
|
|
13251
13253
|
}
|
|
13252
13254
|
return {
|
|
13253
|
-
path:
|
|
13255
|
+
path: path51,
|
|
13254
13256
|
score: 0,
|
|
13255
13257
|
weight,
|
|
13256
13258
|
hit: false,
|
|
13257
|
-
message: `${
|
|
13259
|
+
message: `${path51} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
13258
13260
|
};
|
|
13259
13261
|
}
|
|
13260
13262
|
/**
|
|
13261
13263
|
* Date comparison with format normalization.
|
|
13262
13264
|
*/
|
|
13263
|
-
compareDate(
|
|
13265
|
+
compareDate(path51, candidateValue, expectedValue, fieldConfig, weight) {
|
|
13264
13266
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
13265
13267
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
13266
13268
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
13267
13269
|
if (candidateDate === null) {
|
|
13268
13270
|
return {
|
|
13269
|
-
path:
|
|
13271
|
+
path: path51,
|
|
13270
13272
|
score: 0,
|
|
13271
13273
|
weight,
|
|
13272
13274
|
hit: false,
|
|
13273
|
-
message: `${
|
|
13275
|
+
message: `${path51} (unparseable candidate date)`
|
|
13274
13276
|
};
|
|
13275
13277
|
}
|
|
13276
13278
|
if (expectedDate === null) {
|
|
13277
13279
|
return {
|
|
13278
|
-
path:
|
|
13280
|
+
path: path51,
|
|
13279
13281
|
score: 0,
|
|
13280
13282
|
weight,
|
|
13281
13283
|
hit: false,
|
|
13282
|
-
message: `${
|
|
13284
|
+
message: `${path51} (unparseable expected date)`
|
|
13283
13285
|
};
|
|
13284
13286
|
}
|
|
13285
13287
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
13286
13288
|
return {
|
|
13287
|
-
path:
|
|
13289
|
+
path: path51,
|
|
13288
13290
|
score: 1,
|
|
13289
13291
|
weight,
|
|
13290
13292
|
hit: true,
|
|
13291
|
-
message:
|
|
13293
|
+
message: path51
|
|
13292
13294
|
};
|
|
13293
13295
|
}
|
|
13294
13296
|
return {
|
|
13295
|
-
path:
|
|
13297
|
+
path: path51,
|
|
13296
13298
|
score: 0,
|
|
13297
13299
|
weight,
|
|
13298
13300
|
hit: false,
|
|
13299
|
-
message: `${
|
|
13301
|
+
message: `${path51} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
13300
13302
|
};
|
|
13301
13303
|
}
|
|
13302
13304
|
/**
|
|
@@ -13329,11 +13331,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
13329
13331
|
};
|
|
13330
13332
|
}
|
|
13331
13333
|
};
|
|
13332
|
-
function resolvePath(obj,
|
|
13333
|
-
if (!
|
|
13334
|
+
function resolvePath(obj, path51) {
|
|
13335
|
+
if (!path51 || !obj) {
|
|
13334
13336
|
return void 0;
|
|
13335
13337
|
}
|
|
13336
|
-
const parts =
|
|
13338
|
+
const parts = path51.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
13337
13339
|
let current = obj;
|
|
13338
13340
|
for (const part of parts) {
|
|
13339
13341
|
if (current === null || current === void 0) {
|
|
@@ -13825,8 +13827,8 @@ var TokenUsageEvaluator = class {
|
|
|
13825
13827
|
};
|
|
13826
13828
|
|
|
13827
13829
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
13828
|
-
function getNestedValue(obj,
|
|
13829
|
-
const parts =
|
|
13830
|
+
function getNestedValue(obj, path51) {
|
|
13831
|
+
const parts = path51.split(".");
|
|
13830
13832
|
let current = obj;
|
|
13831
13833
|
for (const part of parts) {
|
|
13832
13834
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -18324,9 +18326,106 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
18324
18326
|
return parts.join("\n");
|
|
18325
18327
|
}
|
|
18326
18328
|
|
|
18327
|
-
// src/evaluation/
|
|
18328
|
-
import {
|
|
18329
|
+
// src/evaluation/workspace/deps-scanner.ts
|
|
18330
|
+
import { readFile as readFile13 } from "node:fs/promises";
|
|
18329
18331
|
import path46 from "node:path";
|
|
18332
|
+
import { parse as parse5 } from "yaml";
|
|
18333
|
+
function normalizeGitUrl(url) {
|
|
18334
|
+
let normalized = url.replace(/\.git$/, "");
|
|
18335
|
+
try {
|
|
18336
|
+
const parsed = new URL(normalized);
|
|
18337
|
+
parsed.hostname = parsed.hostname.toLowerCase();
|
|
18338
|
+
normalized = parsed.toString().replace(/\/$/, "");
|
|
18339
|
+
} catch {
|
|
18340
|
+
}
|
|
18341
|
+
return normalized;
|
|
18342
|
+
}
|
|
18343
|
+
async function scanRepoDeps(evalFilePaths) {
|
|
18344
|
+
const seen = /* @__PURE__ */ new Map();
|
|
18345
|
+
const errors = [];
|
|
18346
|
+
for (const filePath of evalFilePaths) {
|
|
18347
|
+
try {
|
|
18348
|
+
const repos = await extractReposFromEvalFile(filePath);
|
|
18349
|
+
for (const repo of repos) {
|
|
18350
|
+
if (repo.source.type !== "git") continue;
|
|
18351
|
+
const ref = repo.checkout?.ref;
|
|
18352
|
+
const key = `${normalizeGitUrl(repo.source.url)}\0${ref ?? ""}`;
|
|
18353
|
+
const existing = seen.get(key);
|
|
18354
|
+
if (existing) {
|
|
18355
|
+
existing.usedBy.push(filePath);
|
|
18356
|
+
} else {
|
|
18357
|
+
const { ref: _ref, ...checkoutRest } = repo.checkout ?? {};
|
|
18358
|
+
const hasCheckout = Object.keys(checkoutRest).length > 0;
|
|
18359
|
+
seen.set(key, {
|
|
18360
|
+
url: repo.source.url,
|
|
18361
|
+
ref,
|
|
18362
|
+
clone: repo.clone,
|
|
18363
|
+
checkout: hasCheckout ? checkoutRest : void 0,
|
|
18364
|
+
usedBy: [filePath]
|
|
18365
|
+
});
|
|
18366
|
+
}
|
|
18367
|
+
}
|
|
18368
|
+
} catch (err) {
|
|
18369
|
+
errors.push({
|
|
18370
|
+
file: filePath,
|
|
18371
|
+
message: err instanceof Error ? err.message : String(err)
|
|
18372
|
+
});
|
|
18373
|
+
}
|
|
18374
|
+
}
|
|
18375
|
+
return { repos: [...seen.values()], errors };
|
|
18376
|
+
}
|
|
18377
|
+
async function extractReposFromEvalFile(filePath) {
|
|
18378
|
+
const content = await readFile13(filePath, "utf8");
|
|
18379
|
+
const parsed = interpolateEnv(parse5(content), process.env);
|
|
18380
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18381
|
+
const obj = parsed;
|
|
18382
|
+
const evalFileDir = path46.dirname(path46.resolve(filePath));
|
|
18383
|
+
const repos = [];
|
|
18384
|
+
const suiteRepos = await extractReposFromWorkspaceRaw(obj.workspace, evalFileDir);
|
|
18385
|
+
repos.push(...suiteRepos);
|
|
18386
|
+
const tests = Array.isArray(obj.tests) ? obj.tests : [];
|
|
18387
|
+
for (const test of tests) {
|
|
18388
|
+
if (test && typeof test === "object" && !Array.isArray(test)) {
|
|
18389
|
+
const testObj = test;
|
|
18390
|
+
const testRepos = await extractReposFromWorkspaceRaw(testObj.workspace, evalFileDir);
|
|
18391
|
+
repos.push(...testRepos);
|
|
18392
|
+
}
|
|
18393
|
+
}
|
|
18394
|
+
return repos;
|
|
18395
|
+
}
|
|
18396
|
+
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
18397
|
+
if (typeof raw === "string") {
|
|
18398
|
+
const workspaceFilePath = path46.resolve(evalFileDir, raw);
|
|
18399
|
+
const content = await readFile13(workspaceFilePath, "utf8");
|
|
18400
|
+
const parsed = interpolateEnv(parse5(content), process.env);
|
|
18401
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18402
|
+
return extractReposFromObject(parsed);
|
|
18403
|
+
}
|
|
18404
|
+
if (raw && typeof raw === "object" && !Array.isArray(raw)) {
|
|
18405
|
+
return extractReposFromObject(raw);
|
|
18406
|
+
}
|
|
18407
|
+
return [];
|
|
18408
|
+
}
|
|
18409
|
+
function extractReposFromObject(obj) {
|
|
18410
|
+
const rawRepos = Array.isArray(obj.repos) ? obj.repos : [];
|
|
18411
|
+
const result = [];
|
|
18412
|
+
for (const r of rawRepos) {
|
|
18413
|
+
if (!r || typeof r !== "object" || Array.isArray(r)) continue;
|
|
18414
|
+
const repo = r;
|
|
18415
|
+
const source = parseRepoSource(repo.source);
|
|
18416
|
+
if (!source) continue;
|
|
18417
|
+
result.push({
|
|
18418
|
+
source,
|
|
18419
|
+
checkout: parseRepoCheckout(repo.checkout),
|
|
18420
|
+
clone: parseRepoClone(repo.clone)
|
|
18421
|
+
});
|
|
18422
|
+
}
|
|
18423
|
+
return result;
|
|
18424
|
+
}
|
|
18425
|
+
|
|
18426
|
+
// src/evaluation/cache/response-cache.ts
|
|
18427
|
+
import { mkdir as mkdir15, readFile as readFile14, writeFile as writeFile8 } from "node:fs/promises";
|
|
18428
|
+
import path47 from "node:path";
|
|
18330
18429
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18331
18430
|
var ResponseCache = class {
|
|
18332
18431
|
cachePath;
|
|
@@ -18336,7 +18435,7 @@ var ResponseCache = class {
|
|
|
18336
18435
|
async get(key) {
|
|
18337
18436
|
const filePath = this.keyToPath(key);
|
|
18338
18437
|
try {
|
|
18339
|
-
const data = await
|
|
18438
|
+
const data = await readFile14(filePath, "utf8");
|
|
18340
18439
|
return JSON.parse(data);
|
|
18341
18440
|
} catch {
|
|
18342
18441
|
return void 0;
|
|
@@ -18344,13 +18443,13 @@ var ResponseCache = class {
|
|
|
18344
18443
|
}
|
|
18345
18444
|
async set(key, value) {
|
|
18346
18445
|
const filePath = this.keyToPath(key);
|
|
18347
|
-
const dir =
|
|
18446
|
+
const dir = path47.dirname(filePath);
|
|
18348
18447
|
await mkdir15(dir, { recursive: true });
|
|
18349
18448
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
18350
18449
|
}
|
|
18351
18450
|
keyToPath(key) {
|
|
18352
18451
|
const prefix = key.slice(0, 2);
|
|
18353
|
-
return
|
|
18452
|
+
return path47.join(this.cachePath, prefix, `${key}.json`);
|
|
18354
18453
|
}
|
|
18355
18454
|
};
|
|
18356
18455
|
function shouldEnableCache(params) {
|
|
@@ -18367,10 +18466,10 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
18367
18466
|
|
|
18368
18467
|
// src/projects.ts
|
|
18369
18468
|
import { existsSync as existsSync6, mkdirSync as mkdirSync2, readFileSync as readFileSync3, readdirSync as readdirSync3, statSync as statSync2, writeFileSync } from "node:fs";
|
|
18370
|
-
import
|
|
18469
|
+
import path48 from "node:path";
|
|
18371
18470
|
import { parse as parseYaml3, stringify as stringifyYaml } from "yaml";
|
|
18372
18471
|
function getProjectsRegistryPath() {
|
|
18373
|
-
return
|
|
18472
|
+
return path48.join(getAgentvHome(), "projects.yaml");
|
|
18374
18473
|
}
|
|
18375
18474
|
function loadProjectRegistry() {
|
|
18376
18475
|
const registryPath = getProjectsRegistryPath();
|
|
@@ -18390,14 +18489,14 @@ function loadProjectRegistry() {
|
|
|
18390
18489
|
}
|
|
18391
18490
|
function saveProjectRegistry(registry) {
|
|
18392
18491
|
const registryPath = getProjectsRegistryPath();
|
|
18393
|
-
const dir =
|
|
18492
|
+
const dir = path48.dirname(registryPath);
|
|
18394
18493
|
if (!existsSync6(dir)) {
|
|
18395
18494
|
mkdirSync2(dir, { recursive: true });
|
|
18396
18495
|
}
|
|
18397
18496
|
writeFileSync(registryPath, stringifyYaml(registry), "utf-8");
|
|
18398
18497
|
}
|
|
18399
18498
|
function deriveProjectId(dirPath, existingIds) {
|
|
18400
|
-
const base =
|
|
18499
|
+
const base = path48.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
18401
18500
|
let candidate = base || "project";
|
|
18402
18501
|
let suffix = 2;
|
|
18403
18502
|
while (existingIds.includes(candidate)) {
|
|
@@ -18407,11 +18506,11 @@ function deriveProjectId(dirPath, existingIds) {
|
|
|
18407
18506
|
return candidate;
|
|
18408
18507
|
}
|
|
18409
18508
|
function addProject(projectPath) {
|
|
18410
|
-
const absPath =
|
|
18509
|
+
const absPath = path48.resolve(projectPath);
|
|
18411
18510
|
if (!existsSync6(absPath)) {
|
|
18412
18511
|
throw new Error(`Directory not found: ${absPath}`);
|
|
18413
18512
|
}
|
|
18414
|
-
if (!existsSync6(
|
|
18513
|
+
if (!existsSync6(path48.join(absPath, ".agentv"))) {
|
|
18415
18514
|
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
18416
18515
|
}
|
|
18417
18516
|
const registry = loadProjectRegistry();
|
|
@@ -18425,7 +18524,7 @@ function addProject(projectPath) {
|
|
|
18425
18524
|
absPath,
|
|
18426
18525
|
registry.projects.map((p) => p.id)
|
|
18427
18526
|
),
|
|
18428
|
-
name:
|
|
18527
|
+
name: path48.basename(absPath),
|
|
18429
18528
|
path: absPath,
|
|
18430
18529
|
addedAt: now,
|
|
18431
18530
|
lastOpenedAt: now
|
|
@@ -18454,14 +18553,14 @@ function touchProject(projectId) {
|
|
|
18454
18553
|
}
|
|
18455
18554
|
}
|
|
18456
18555
|
function discoverProjects(rootDir, maxDepth = 2) {
|
|
18457
|
-
const absRoot =
|
|
18556
|
+
const absRoot = path48.resolve(rootDir);
|
|
18458
18557
|
if (!existsSync6(absRoot) || !statSync2(absRoot).isDirectory()) {
|
|
18459
18558
|
return [];
|
|
18460
18559
|
}
|
|
18461
18560
|
const results = [];
|
|
18462
18561
|
function scan(dir, depth) {
|
|
18463
18562
|
if (depth > maxDepth) return;
|
|
18464
|
-
if (existsSync6(
|
|
18563
|
+
if (existsSync6(path48.join(dir, ".agentv"))) {
|
|
18465
18564
|
results.push(dir);
|
|
18466
18565
|
return;
|
|
18467
18566
|
}
|
|
@@ -18471,7 +18570,7 @@ function discoverProjects(rootDir, maxDepth = 2) {
|
|
|
18471
18570
|
for (const entry of entries) {
|
|
18472
18571
|
if (!entry.isDirectory()) continue;
|
|
18473
18572
|
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
18474
|
-
scan(
|
|
18573
|
+
scan(path48.join(dir, entry.name), depth + 1);
|
|
18475
18574
|
}
|
|
18476
18575
|
} catch {
|
|
18477
18576
|
}
|
|
@@ -19384,8 +19483,8 @@ function extractResponseItemContent(content) {
|
|
|
19384
19483
|
// src/import/codex-session-discovery.ts
|
|
19385
19484
|
import { readdir as readdir8, stat as stat9 } from "node:fs/promises";
|
|
19386
19485
|
import { homedir as homedir3 } from "node:os";
|
|
19387
|
-
import
|
|
19388
|
-
var DEFAULT_SESSIONS_DIR = () =>
|
|
19486
|
+
import path49 from "node:path";
|
|
19487
|
+
var DEFAULT_SESSIONS_DIR = () => path49.join(homedir3(), ".codex", "sessions");
|
|
19389
19488
|
async function discoverCodexSessions(opts) {
|
|
19390
19489
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
19391
19490
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
@@ -19397,7 +19496,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19397
19496
|
return [];
|
|
19398
19497
|
}
|
|
19399
19498
|
for (const year of yearDirs) {
|
|
19400
|
-
const yearPath =
|
|
19499
|
+
const yearPath = path49.join(sessionsDir, year);
|
|
19401
19500
|
let monthDirs;
|
|
19402
19501
|
try {
|
|
19403
19502
|
monthDirs = await readdir8(yearPath);
|
|
@@ -19405,7 +19504,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19405
19504
|
continue;
|
|
19406
19505
|
}
|
|
19407
19506
|
for (const month of monthDirs) {
|
|
19408
|
-
const monthPath =
|
|
19507
|
+
const monthPath = path49.join(yearPath, month);
|
|
19409
19508
|
let dayDirs;
|
|
19410
19509
|
try {
|
|
19411
19510
|
dayDirs = await readdir8(monthPath);
|
|
@@ -19417,7 +19516,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19417
19516
|
const dirDate = `${year}-${month}-${day}`;
|
|
19418
19517
|
if (dirDate !== opts.date) continue;
|
|
19419
19518
|
}
|
|
19420
|
-
const dayPath =
|
|
19519
|
+
const dayPath = path49.join(monthPath, day);
|
|
19421
19520
|
let files;
|
|
19422
19521
|
try {
|
|
19423
19522
|
files = await readdir8(dayPath);
|
|
@@ -19426,7 +19525,7 @@ async function discoverCodexSessions(opts) {
|
|
|
19426
19525
|
}
|
|
19427
19526
|
for (const file of files) {
|
|
19428
19527
|
if (!file.startsWith("rollout-") || !file.endsWith(".jsonl")) continue;
|
|
19429
|
-
const filePath =
|
|
19528
|
+
const filePath = path49.join(dayPath, file);
|
|
19430
19529
|
const nameWithoutExt = file.replace(/\.jsonl$/, "");
|
|
19431
19530
|
const parts = nameWithoutExt.split("-");
|
|
19432
19531
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
@@ -19449,8 +19548,8 @@ async function discoverCodexSessions(opts) {
|
|
|
19449
19548
|
// src/import/session-discovery.ts
|
|
19450
19549
|
import { readdir as readdir9, stat as stat10 } from "node:fs/promises";
|
|
19451
19550
|
import { homedir as homedir4 } from "node:os";
|
|
19452
|
-
import
|
|
19453
|
-
var DEFAULT_PROJECTS_DIR = () =>
|
|
19551
|
+
import path50 from "node:path";
|
|
19552
|
+
var DEFAULT_PROJECTS_DIR = () => path50.join(homedir4(), ".claude", "projects");
|
|
19454
19553
|
function encodeProjectPath(projectPath) {
|
|
19455
19554
|
return projectPath.replace(/\//g, "-");
|
|
19456
19555
|
}
|
|
@@ -19469,7 +19568,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19469
19568
|
}
|
|
19470
19569
|
const sessions = [];
|
|
19471
19570
|
for (const projectDir of projectDirs) {
|
|
19472
|
-
const dirPath =
|
|
19571
|
+
const dirPath = path50.join(projectsDir, projectDir);
|
|
19473
19572
|
let entries;
|
|
19474
19573
|
try {
|
|
19475
19574
|
entries = await readdir9(dirPath);
|
|
@@ -19480,7 +19579,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19480
19579
|
if (!entry.endsWith(".jsonl")) continue;
|
|
19481
19580
|
const sessionId = entry.replace(/\.jsonl$/, "");
|
|
19482
19581
|
if (opts?.sessionId && sessionId !== opts.sessionId) continue;
|
|
19483
|
-
const filePath =
|
|
19582
|
+
const filePath = path50.join(dirPath, entry);
|
|
19484
19583
|
let updatedAt;
|
|
19485
19584
|
try {
|
|
19486
19585
|
const fileStat = await stat10(filePath);
|
|
@@ -19501,7 +19600,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
19501
19600
|
}
|
|
19502
19601
|
|
|
19503
19602
|
// src/import/types.ts
|
|
19504
|
-
import { readFile as
|
|
19603
|
+
import { readFile as readFile15 } from "node:fs/promises";
|
|
19505
19604
|
function toTranscriptJsonLine(entry) {
|
|
19506
19605
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
19507
19606
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -19527,11 +19626,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
19527
19626
|
};
|
|
19528
19627
|
}
|
|
19529
19628
|
async function readTranscriptJsonl(filePath) {
|
|
19530
|
-
const text = await
|
|
19629
|
+
const text = await readFile15(filePath, "utf8");
|
|
19531
19630
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
19532
19631
|
}
|
|
19533
19632
|
async function readTranscriptFile(filePath) {
|
|
19534
|
-
return
|
|
19633
|
+
return readFile15(filePath, "utf8");
|
|
19535
19634
|
}
|
|
19536
19635
|
|
|
19537
19636
|
// src/import/transcript-provider.ts
|
|
@@ -19745,6 +19844,7 @@ export {
|
|
|
19745
19844
|
runRegexAssertion,
|
|
19746
19845
|
runStartsWithAssertion,
|
|
19747
19846
|
saveProjectRegistry,
|
|
19847
|
+
scanRepoDeps,
|
|
19748
19848
|
scoreToVerdict,
|
|
19749
19849
|
shouldEnableCache,
|
|
19750
19850
|
shouldSkipCacheForTemperature,
|