@agentv/core 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-L7I5UTJU.js → chunk-UQLHF3T7.js} +12 -3
- package/dist/chunk-UQLHF3T7.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +143 -2
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.d.cts +1 -1
- package/dist/evaluation/validation/index.d.ts +1 -1
- package/dist/evaluation/validation/index.js +143 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +56 -86
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +46 -83
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-L7I5UTJU.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -4,9 +4,10 @@ import {
|
|
|
4
4
|
buildSearchRoots,
|
|
5
5
|
fileExists,
|
|
6
6
|
findGitRoot,
|
|
7
|
+
isAgentProvider,
|
|
7
8
|
readTextFile,
|
|
8
9
|
resolveFileReference
|
|
9
|
-
} from "./chunk-
|
|
10
|
+
} from "./chunk-UQLHF3T7.js";
|
|
10
11
|
|
|
11
12
|
// src/evaluation/types.ts
|
|
12
13
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -1010,7 +1011,7 @@ function formatTimeoutSuffix(timeoutMs) {
|
|
|
1010
1011
|
import { exec as execCallback, spawn } from "node:child_process";
|
|
1011
1012
|
import { randomUUID } from "node:crypto";
|
|
1012
1013
|
import { constants as constants2, createWriteStream } from "node:fs";
|
|
1013
|
-
import { access as access2,
|
|
1014
|
+
import { access as access2, mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
|
|
1014
1015
|
import { tmpdir } from "node:os";
|
|
1015
1016
|
import path4 from "node:path";
|
|
1016
1017
|
import { promisify as promisify2 } from "node:util";
|
|
@@ -1173,7 +1174,6 @@ function pathToFileUri(filePath) {
|
|
|
1173
1174
|
var execAsync2 = promisify2(execCallback);
|
|
1174
1175
|
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
1175
1176
|
var PROMPT_FILENAME = "prompt.md";
|
|
1176
|
-
var FILES_DIR = "files";
|
|
1177
1177
|
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
1178
1178
|
var CodexProvider = class {
|
|
1179
1179
|
id;
|
|
@@ -1196,21 +1196,10 @@ var CodexProvider = class {
|
|
|
1196
1196
|
}
|
|
1197
1197
|
await this.ensureEnvironmentReady();
|
|
1198
1198
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
1199
|
-
const originalGuidelines = new Set(
|
|
1200
|
-
collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => path4.resolve(file))
|
|
1201
|
-
);
|
|
1202
1199
|
const workspaceRoot = await this.createWorkspace();
|
|
1203
1200
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
1204
1201
|
try {
|
|
1205
|
-
const
|
|
1206
|
-
inputFiles,
|
|
1207
|
-
workspaceRoot,
|
|
1208
|
-
originalGuidelines
|
|
1209
|
-
);
|
|
1210
|
-
const promptContent = buildPromptDocument(request, mirroredInputFiles, {
|
|
1211
|
-
guidelinePatterns: request.guideline_patterns,
|
|
1212
|
-
guidelineOverrides: guidelineMirrors
|
|
1213
|
-
});
|
|
1202
|
+
const promptContent = buildPromptDocument(request, inputFiles);
|
|
1214
1203
|
const promptFile = path4.join(workspaceRoot, PROMPT_FILENAME);
|
|
1215
1204
|
await writeFile(promptFile, promptContent, "utf8");
|
|
1216
1205
|
const args = this.buildCodexArgs();
|
|
@@ -1239,7 +1228,7 @@ var CodexProvider = class {
|
|
|
1239
1228
|
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1240
1229
|
promptFile,
|
|
1241
1230
|
workspace: workspaceRoot,
|
|
1242
|
-
inputFiles
|
|
1231
|
+
inputFiles,
|
|
1243
1232
|
logFile: logger?.filePath
|
|
1244
1233
|
}
|
|
1245
1234
|
};
|
|
@@ -1294,37 +1283,6 @@ var CodexProvider = class {
|
|
|
1294
1283
|
throw error;
|
|
1295
1284
|
}
|
|
1296
1285
|
}
|
|
1297
|
-
async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
|
|
1298
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
1299
|
-
return {
|
|
1300
|
-
mirroredInputFiles: void 0,
|
|
1301
|
-
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
1302
|
-
};
|
|
1303
|
-
}
|
|
1304
|
-
const filesRoot = path4.join(workspaceRoot, FILES_DIR);
|
|
1305
|
-
await mkdir(filesRoot, { recursive: true });
|
|
1306
|
-
const mirrored = [];
|
|
1307
|
-
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
1308
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
1309
|
-
for (const inputFile of inputFiles) {
|
|
1310
|
-
const absoluteSource = path4.resolve(inputFile);
|
|
1311
|
-
const baseName = path4.basename(absoluteSource);
|
|
1312
|
-
const count = nameCounts.get(baseName) ?? 0;
|
|
1313
|
-
nameCounts.set(baseName, count + 1);
|
|
1314
|
-
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
1315
|
-
const destination = path4.join(filesRoot, finalName);
|
|
1316
|
-
await copyFile(absoluteSource, destination);
|
|
1317
|
-
const resolvedDestination = path4.resolve(destination);
|
|
1318
|
-
mirrored.push(resolvedDestination);
|
|
1319
|
-
if (guidelineOriginals.has(absoluteSource)) {
|
|
1320
|
-
guidelineMirrors.add(resolvedDestination);
|
|
1321
|
-
}
|
|
1322
|
-
}
|
|
1323
|
-
return {
|
|
1324
|
-
mirroredInputFiles: mirrored,
|
|
1325
|
-
guidelineMirrors
|
|
1326
|
-
};
|
|
1327
|
-
}
|
|
1328
1286
|
async createWorkspace() {
|
|
1329
1287
|
return await mkdtemp(path4.join(tmpdir(), WORKSPACE_PREFIX));
|
|
1330
1288
|
}
|
|
@@ -2295,23 +2253,25 @@ function resolveOptionalString(source, env, description, options) {
|
|
|
2295
2253
|
if (trimmed.length === 0) {
|
|
2296
2254
|
return void 0;
|
|
2297
2255
|
}
|
|
2298
|
-
const
|
|
2299
|
-
if (
|
|
2300
|
-
|
|
2301
|
-
|
|
2256
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
2257
|
+
if (envVarMatch) {
|
|
2258
|
+
const varName = envVarMatch[1];
|
|
2259
|
+
const envValue = env[varName];
|
|
2260
|
+
if (envValue !== void 0) {
|
|
2261
|
+
if (envValue.trim().length === 0) {
|
|
2262
|
+
throw new Error(`Environment variable '${varName}' for ${description} is empty`);
|
|
2263
|
+
}
|
|
2264
|
+
return envValue;
|
|
2302
2265
|
}
|
|
2303
|
-
|
|
2304
|
-
}
|
|
2305
|
-
const allowLiteral = options?.allowLiteral ?? false;
|
|
2306
|
-
const optionalEnv = options?.optionalEnv ?? false;
|
|
2307
|
-
const looksLikeEnv = isLikelyEnvReference(trimmed);
|
|
2308
|
-
if (looksLikeEnv) {
|
|
2266
|
+
const optionalEnv = options?.optionalEnv ?? false;
|
|
2309
2267
|
if (optionalEnv) {
|
|
2310
2268
|
return void 0;
|
|
2311
2269
|
}
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2270
|
+
throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
|
|
2271
|
+
}
|
|
2272
|
+
const allowLiteral = options?.allowLiteral ?? false;
|
|
2273
|
+
if (!allowLiteral) {
|
|
2274
|
+
throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
|
|
2315
2275
|
}
|
|
2316
2276
|
return trimmed;
|
|
2317
2277
|
}
|
|
@@ -2358,9 +2318,6 @@ function resolveOptionalBoolean(source) {
|
|
|
2358
2318
|
}
|
|
2359
2319
|
throw new Error("expected boolean value");
|
|
2360
2320
|
}
|
|
2361
|
-
function isLikelyEnvReference(value) {
|
|
2362
|
-
return /^[A-Z0-9_]+$/.test(value);
|
|
2363
|
-
}
|
|
2364
2321
|
function resolveOptionalStringArray(source, env, description) {
|
|
2365
2322
|
if (source === void 0 || source === null) {
|
|
2366
2323
|
return void 0;
|
|
@@ -2381,21 +2338,25 @@ function resolveOptionalStringArray(source, env, description) {
|
|
|
2381
2338
|
if (trimmed.length === 0) {
|
|
2382
2339
|
throw new Error(`${description}[${i}] cannot be empty`);
|
|
2383
2340
|
}
|
|
2384
|
-
const
|
|
2385
|
-
if (
|
|
2386
|
-
|
|
2387
|
-
|
|
2341
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
2342
|
+
if (envVarMatch) {
|
|
2343
|
+
const varName = envVarMatch[1];
|
|
2344
|
+
const envValue = env[varName];
|
|
2345
|
+
if (envValue !== void 0) {
|
|
2346
|
+
if (envValue.trim().length === 0) {
|
|
2347
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i}] is empty`);
|
|
2348
|
+
}
|
|
2349
|
+
resolved.push(envValue);
|
|
2350
|
+
continue;
|
|
2388
2351
|
}
|
|
2389
|
-
|
|
2390
|
-
} else {
|
|
2391
|
-
resolved.push(trimmed);
|
|
2352
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i}] is not set`);
|
|
2392
2353
|
}
|
|
2354
|
+
resolved.push(trimmed);
|
|
2393
2355
|
}
|
|
2394
2356
|
return resolved.length > 0 ? resolved : void 0;
|
|
2395
2357
|
}
|
|
2396
2358
|
|
|
2397
2359
|
// src/evaluation/providers/vscode.ts
|
|
2398
|
-
import { readFile as readFile2 } from "node:fs/promises";
|
|
2399
2360
|
import path5 from "node:path";
|
|
2400
2361
|
import { dispatchAgentSession, dispatchBatchAgent, getSubagentRoot, provisionSubagents } from "subagent";
|
|
2401
2362
|
var VSCodeProvider = class {
|
|
@@ -2439,7 +2400,7 @@ var VSCodeProvider = class {
|
|
|
2439
2400
|
}
|
|
2440
2401
|
};
|
|
2441
2402
|
}
|
|
2442
|
-
const responseText = await
|
|
2403
|
+
const responseText = await readTextFile(session.responseFile);
|
|
2443
2404
|
return {
|
|
2444
2405
|
text: responseText,
|
|
2445
2406
|
raw: {
|
|
@@ -2493,7 +2454,7 @@ var VSCodeProvider = class {
|
|
|
2493
2454
|
}
|
|
2494
2455
|
const responses = [];
|
|
2495
2456
|
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
2496
|
-
const responseText = await
|
|
2457
|
+
const responseText = await readTextFile(responseFile);
|
|
2497
2458
|
responses.push({
|
|
2498
2459
|
text: responseText,
|
|
2499
2460
|
raw: {
|
|
@@ -2643,7 +2604,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
2643
2604
|
|
|
2644
2605
|
// src/evaluation/providers/targets-file.ts
|
|
2645
2606
|
import { constants as constants3 } from "node:fs";
|
|
2646
|
-
import { access as access3, readFile as
|
|
2607
|
+
import { access as access3, readFile as readFile2 } from "node:fs/promises";
|
|
2647
2608
|
import path6 from "node:path";
|
|
2648
2609
|
import { parse as parse2 } from "yaml";
|
|
2649
2610
|
function isRecord(value) {
|
|
@@ -2711,7 +2672,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
2711
2672
|
if (!await fileExists3(absolutePath)) {
|
|
2712
2673
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
2713
2674
|
}
|
|
2714
|
-
const raw = await
|
|
2675
|
+
const raw = await readFile2(absolutePath, "utf8");
|
|
2715
2676
|
const parsed = parse2(raw);
|
|
2716
2677
|
if (!isRecord(parsed)) {
|
|
2717
2678
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
@@ -3376,7 +3337,8 @@ async function runEvaluation(options) {
|
|
|
3376
3337
|
target.name,
|
|
3377
3338
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
3378
3339
|
outcome.reason,
|
|
3379
|
-
promptInputs
|
|
3340
|
+
promptInputs,
|
|
3341
|
+
primaryProvider
|
|
3380
3342
|
);
|
|
3381
3343
|
results.push(errorResult);
|
|
3382
3344
|
if (onResult) {
|
|
@@ -3460,7 +3422,7 @@ async function runBatchEvaluation(options) {
|
|
|
3460
3422
|
agentTimeoutMs
|
|
3461
3423
|
});
|
|
3462
3424
|
} catch (error) {
|
|
3463
|
-
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
3425
|
+
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
3464
3426
|
results.push(errorResult);
|
|
3465
3427
|
if (onResult) {
|
|
3466
3428
|
await onResult(errorResult);
|
|
@@ -3537,7 +3499,7 @@ async function runEvalCase(options) {
|
|
|
3537
3499
|
attempt += 1;
|
|
3538
3500
|
continue;
|
|
3539
3501
|
}
|
|
3540
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
3502
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
3541
3503
|
}
|
|
3542
3504
|
}
|
|
3543
3505
|
if (!providerResponse) {
|
|
@@ -3546,7 +3508,8 @@ async function runEvalCase(options) {
|
|
|
3546
3508
|
target.name,
|
|
3547
3509
|
nowFn(),
|
|
3548
3510
|
lastError ?? new Error("Provider did not return a response"),
|
|
3549
|
-
promptInputs
|
|
3511
|
+
promptInputs,
|
|
3512
|
+
provider
|
|
3550
3513
|
);
|
|
3551
3514
|
}
|
|
3552
3515
|
if (cacheKey && cache && !cachedResponse) {
|
|
@@ -3566,7 +3529,7 @@ async function runEvalCase(options) {
|
|
|
3566
3529
|
agentTimeoutMs
|
|
3567
3530
|
});
|
|
3568
3531
|
} catch (error) {
|
|
3569
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
3532
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
3570
3533
|
}
|
|
3571
3534
|
}
|
|
3572
3535
|
async function evaluateCandidate(options) {
|
|
@@ -3598,7 +3561,7 @@ async function evaluateCandidate(options) {
|
|
|
3598
3561
|
const completedAt = nowFn();
|
|
3599
3562
|
const rawRequest = {
|
|
3600
3563
|
question: promptInputs.question,
|
|
3601
|
-
guidelines: promptInputs.guidelines,
|
|
3564
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
3602
3565
|
guideline_paths: evalCase.guideline_paths,
|
|
3603
3566
|
system_message: promptInputs.systemMessage ?? ""
|
|
3604
3567
|
};
|
|
@@ -3858,11 +3821,11 @@ async function invokeProvider(provider, options) {
|
|
|
3858
3821
|
}
|
|
3859
3822
|
}
|
|
3860
3823
|
}
|
|
3861
|
-
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
3824
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
|
|
3862
3825
|
const message = error instanceof Error ? error.message : String(error);
|
|
3863
3826
|
const rawRequest = {
|
|
3864
3827
|
question: promptInputs.question,
|
|
3865
|
-
guidelines: promptInputs.guidelines,
|
|
3828
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
3866
3829
|
guideline_paths: evalCase.guideline_paths,
|
|
3867
3830
|
system_message: promptInputs.systemMessage ?? "",
|
|
3868
3831
|
error: message
|