@agentv/core 0.7.0 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-L7I5UTJU.js → chunk-UQLHF3T7.js} +12 -3
- package/dist/chunk-UQLHF3T7.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +143 -2
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.d.cts +1 -1
- package/dist/evaluation/validation/index.d.ts +1 -1
- package/dist/evaluation/validation/index.js +143 -2
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +56 -86
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +46 -83
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/chunk-L7I5UTJU.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1338,7 +1338,6 @@ function pathToFileUri(filePath) {
|
|
|
1338
1338
|
var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process2.exec);
|
|
1339
1339
|
var WORKSPACE_PREFIX = "agentv-codex-";
|
|
1340
1340
|
var PROMPT_FILENAME = "prompt.md";
|
|
1341
|
-
var FILES_DIR = "files";
|
|
1342
1341
|
var JSONL_TYPE_ITEM_COMPLETED = "item.completed";
|
|
1343
1342
|
var CodexProvider = class {
|
|
1344
1343
|
id;
|
|
@@ -1361,21 +1360,10 @@ var CodexProvider = class {
|
|
|
1361
1360
|
}
|
|
1362
1361
|
await this.ensureEnvironmentReady();
|
|
1363
1362
|
const inputFiles = normalizeInputFiles2(request.inputFiles);
|
|
1364
|
-
const originalGuidelines = new Set(
|
|
1365
|
-
collectGuidelineFiles(inputFiles, request.guideline_patterns).map((file) => import_node_path5.default.resolve(file))
|
|
1366
|
-
);
|
|
1367
1363
|
const workspaceRoot = await this.createWorkspace();
|
|
1368
1364
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
1369
1365
|
try {
|
|
1370
|
-
const
|
|
1371
|
-
inputFiles,
|
|
1372
|
-
workspaceRoot,
|
|
1373
|
-
originalGuidelines
|
|
1374
|
-
);
|
|
1375
|
-
const promptContent = buildPromptDocument(request, mirroredInputFiles, {
|
|
1376
|
-
guidelinePatterns: request.guideline_patterns,
|
|
1377
|
-
guidelineOverrides: guidelineMirrors
|
|
1378
|
-
});
|
|
1366
|
+
const promptContent = buildPromptDocument(request, inputFiles);
|
|
1379
1367
|
const promptFile = import_node_path5.default.join(workspaceRoot, PROMPT_FILENAME);
|
|
1380
1368
|
await (0, import_promises3.writeFile)(promptFile, promptContent, "utf8");
|
|
1381
1369
|
const args = this.buildCodexArgs();
|
|
@@ -1404,7 +1392,7 @@ var CodexProvider = class {
|
|
|
1404
1392
|
executable: this.resolvedExecutable ?? this.config.executable,
|
|
1405
1393
|
promptFile,
|
|
1406
1394
|
workspace: workspaceRoot,
|
|
1407
|
-
inputFiles
|
|
1395
|
+
inputFiles,
|
|
1408
1396
|
logFile: logger?.filePath
|
|
1409
1397
|
}
|
|
1410
1398
|
};
|
|
@@ -1459,37 +1447,6 @@ var CodexProvider = class {
|
|
|
1459
1447
|
throw error;
|
|
1460
1448
|
}
|
|
1461
1449
|
}
|
|
1462
|
-
async mirrorInputFiles(inputFiles, workspaceRoot, guidelineOriginals) {
|
|
1463
|
-
if (!inputFiles || inputFiles.length === 0) {
|
|
1464
|
-
return {
|
|
1465
|
-
mirroredInputFiles: void 0,
|
|
1466
|
-
guidelineMirrors: /* @__PURE__ */ new Set()
|
|
1467
|
-
};
|
|
1468
|
-
}
|
|
1469
|
-
const filesRoot = import_node_path5.default.join(workspaceRoot, FILES_DIR);
|
|
1470
|
-
await (0, import_promises3.mkdir)(filesRoot, { recursive: true });
|
|
1471
|
-
const mirrored = [];
|
|
1472
|
-
const guidelineMirrors = /* @__PURE__ */ new Set();
|
|
1473
|
-
const nameCounts = /* @__PURE__ */ new Map();
|
|
1474
|
-
for (const inputFile of inputFiles) {
|
|
1475
|
-
const absoluteSource = import_node_path5.default.resolve(inputFile);
|
|
1476
|
-
const baseName = import_node_path5.default.basename(absoluteSource);
|
|
1477
|
-
const count = nameCounts.get(baseName) ?? 0;
|
|
1478
|
-
nameCounts.set(baseName, count + 1);
|
|
1479
|
-
const finalName = count === 0 ? baseName : `${baseName}.${count}`;
|
|
1480
|
-
const destination = import_node_path5.default.join(filesRoot, finalName);
|
|
1481
|
-
await (0, import_promises3.copyFile)(absoluteSource, destination);
|
|
1482
|
-
const resolvedDestination = import_node_path5.default.resolve(destination);
|
|
1483
|
-
mirrored.push(resolvedDestination);
|
|
1484
|
-
if (guidelineOriginals.has(absoluteSource)) {
|
|
1485
|
-
guidelineMirrors.add(resolvedDestination);
|
|
1486
|
-
}
|
|
1487
|
-
}
|
|
1488
|
-
return {
|
|
1489
|
-
mirroredInputFiles: mirrored,
|
|
1490
|
-
guidelineMirrors
|
|
1491
|
-
};
|
|
1492
|
-
}
|
|
1493
1450
|
async createWorkspace() {
|
|
1494
1451
|
return await (0, import_promises3.mkdtemp)(import_node_path5.default.join((0, import_node_os.tmpdir)(), WORKSPACE_PREFIX));
|
|
1495
1452
|
}
|
|
@@ -2460,23 +2417,25 @@ function resolveOptionalString(source, env, description, options) {
|
|
|
2460
2417
|
if (trimmed.length === 0) {
|
|
2461
2418
|
return void 0;
|
|
2462
2419
|
}
|
|
2463
|
-
const
|
|
2464
|
-
if (
|
|
2465
|
-
|
|
2466
|
-
|
|
2420
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
2421
|
+
if (envVarMatch) {
|
|
2422
|
+
const varName = envVarMatch[1];
|
|
2423
|
+
const envValue = env[varName];
|
|
2424
|
+
if (envValue !== void 0) {
|
|
2425
|
+
if (envValue.trim().length === 0) {
|
|
2426
|
+
throw new Error(`Environment variable '${varName}' for ${description} is empty`);
|
|
2427
|
+
}
|
|
2428
|
+
return envValue;
|
|
2467
2429
|
}
|
|
2468
|
-
|
|
2469
|
-
}
|
|
2470
|
-
const allowLiteral = options?.allowLiteral ?? false;
|
|
2471
|
-
const optionalEnv = options?.optionalEnv ?? false;
|
|
2472
|
-
const looksLikeEnv = isLikelyEnvReference(trimmed);
|
|
2473
|
-
if (looksLikeEnv) {
|
|
2430
|
+
const optionalEnv = options?.optionalEnv ?? false;
|
|
2474
2431
|
if (optionalEnv) {
|
|
2475
2432
|
return void 0;
|
|
2476
2433
|
}
|
|
2477
|
-
|
|
2478
|
-
|
|
2479
|
-
|
|
2434
|
+
throw new Error(`Environment variable '${varName}' required for ${description} is not set`);
|
|
2435
|
+
}
|
|
2436
|
+
const allowLiteral = options?.allowLiteral ?? false;
|
|
2437
|
+
if (!allowLiteral) {
|
|
2438
|
+
throw new Error(`${description} must use \${{ VARIABLE_NAME }} syntax for environment variables or be marked as allowing literals`);
|
|
2480
2439
|
}
|
|
2481
2440
|
return trimmed;
|
|
2482
2441
|
}
|
|
@@ -2523,9 +2482,6 @@ function resolveOptionalBoolean(source) {
|
|
|
2523
2482
|
}
|
|
2524
2483
|
throw new Error("expected boolean value");
|
|
2525
2484
|
}
|
|
2526
|
-
function isLikelyEnvReference(value) {
|
|
2527
|
-
return /^[A-Z0-9_]+$/.test(value);
|
|
2528
|
-
}
|
|
2529
2485
|
function resolveOptionalStringArray(source, env, description) {
|
|
2530
2486
|
if (source === void 0 || source === null) {
|
|
2531
2487
|
return void 0;
|
|
@@ -2546,21 +2502,25 @@ function resolveOptionalStringArray(source, env, description) {
|
|
|
2546
2502
|
if (trimmed.length === 0) {
|
|
2547
2503
|
throw new Error(`${description}[${i}] cannot be empty`);
|
|
2548
2504
|
}
|
|
2549
|
-
const
|
|
2550
|
-
if (
|
|
2551
|
-
|
|
2552
|
-
|
|
2505
|
+
const envVarMatch = trimmed.match(/^\$\{\{\s*([A-Z0-9_]+)\s*\}\}$/i);
|
|
2506
|
+
if (envVarMatch) {
|
|
2507
|
+
const varName = envVarMatch[1];
|
|
2508
|
+
const envValue = env[varName];
|
|
2509
|
+
if (envValue !== void 0) {
|
|
2510
|
+
if (envValue.trim().length === 0) {
|
|
2511
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i}] is empty`);
|
|
2512
|
+
}
|
|
2513
|
+
resolved.push(envValue);
|
|
2514
|
+
continue;
|
|
2553
2515
|
}
|
|
2554
|
-
|
|
2555
|
-
} else {
|
|
2556
|
-
resolved.push(trimmed);
|
|
2516
|
+
throw new Error(`Environment variable '${varName}' for ${description}[${i}] is not set`);
|
|
2557
2517
|
}
|
|
2518
|
+
resolved.push(trimmed);
|
|
2558
2519
|
}
|
|
2559
2520
|
return resolved.length > 0 ? resolved : void 0;
|
|
2560
2521
|
}
|
|
2561
2522
|
|
|
2562
2523
|
// src/evaluation/providers/vscode.ts
|
|
2563
|
-
var import_promises4 = require("fs/promises");
|
|
2564
2524
|
var import_node_path6 = __toESM(require("path"), 1);
|
|
2565
2525
|
var import_subagent = require("subagent");
|
|
2566
2526
|
var VSCodeProvider = class {
|
|
@@ -2604,7 +2564,7 @@ var VSCodeProvider = class {
|
|
|
2604
2564
|
}
|
|
2605
2565
|
};
|
|
2606
2566
|
}
|
|
2607
|
-
const responseText = await (
|
|
2567
|
+
const responseText = await readTextFile(session.responseFile);
|
|
2608
2568
|
return {
|
|
2609
2569
|
text: responseText,
|
|
2610
2570
|
raw: {
|
|
@@ -2658,7 +2618,7 @@ var VSCodeProvider = class {
|
|
|
2658
2618
|
}
|
|
2659
2619
|
const responses = [];
|
|
2660
2620
|
for (const [index, responseFile] of session.responseFiles.entries()) {
|
|
2661
|
-
const responseText = await (
|
|
2621
|
+
const responseText = await readTextFile(responseFile);
|
|
2662
2622
|
responses.push({
|
|
2663
2623
|
text: responseText,
|
|
2664
2624
|
raw: {
|
|
@@ -2808,12 +2768,20 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
2808
2768
|
|
|
2809
2769
|
// src/evaluation/providers/targets-file.ts
|
|
2810
2770
|
var import_node_fs4 = require("fs");
|
|
2811
|
-
var
|
|
2771
|
+
var import_promises4 = require("fs/promises");
|
|
2812
2772
|
var import_node_path7 = __toESM(require("path"), 1);
|
|
2813
2773
|
var import_yaml2 = require("yaml");
|
|
2814
2774
|
|
|
2815
2775
|
// src/evaluation/providers/types.ts
|
|
2816
|
-
var
|
|
2776
|
+
var AGENT_PROVIDER_KINDS = [
|
|
2777
|
+
"codex",
|
|
2778
|
+
"vscode",
|
|
2779
|
+
"vscode-insiders"
|
|
2780
|
+
];
|
|
2781
|
+
var TARGETS_SCHEMA_V2 = "agentv-targets-v2.1";
|
|
2782
|
+
function isAgentProvider(provider) {
|
|
2783
|
+
return provider ? AGENT_PROVIDER_KINDS.includes(provider.kind) : false;
|
|
2784
|
+
}
|
|
2817
2785
|
|
|
2818
2786
|
// src/evaluation/providers/targets-file.ts
|
|
2819
2787
|
function isRecord(value) {
|
|
@@ -2870,7 +2838,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
2870
2838
|
}
|
|
2871
2839
|
async function fileExists3(filePath) {
|
|
2872
2840
|
try {
|
|
2873
|
-
await (0,
|
|
2841
|
+
await (0, import_promises4.access)(filePath, import_node_fs4.constants.F_OK);
|
|
2874
2842
|
return true;
|
|
2875
2843
|
} catch {
|
|
2876
2844
|
return false;
|
|
@@ -2881,7 +2849,7 @@ async function readTargetDefinitions(filePath) {
|
|
|
2881
2849
|
if (!await fileExists3(absolutePath)) {
|
|
2882
2850
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
2883
2851
|
}
|
|
2884
|
-
const raw = await (0,
|
|
2852
|
+
const raw = await (0, import_promises4.readFile)(absolutePath, "utf8");
|
|
2885
2853
|
const parsed = (0, import_yaml2.parse)(raw);
|
|
2886
2854
|
if (!isRecord(parsed)) {
|
|
2887
2855
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with '$schema' and 'targets' fields`);
|
|
@@ -3227,7 +3195,7 @@ function substituteVariables(template, variables) {
|
|
|
3227
3195
|
|
|
3228
3196
|
// src/evaluation/orchestrator.ts
|
|
3229
3197
|
var import_node_crypto3 = require("crypto");
|
|
3230
|
-
var
|
|
3198
|
+
var import_promises5 = require("fs/promises");
|
|
3231
3199
|
var import_node_path8 = __toESM(require("path"), 1);
|
|
3232
3200
|
|
|
3233
3201
|
// ../../node_modules/.pnpm/yocto-queue@1.2.1/node_modules/yocto-queue/index.js
|
|
@@ -3546,7 +3514,8 @@ async function runEvaluation(options) {
|
|
|
3546
3514
|
target.name,
|
|
3547
3515
|
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
3548
3516
|
outcome.reason,
|
|
3549
|
-
promptInputs
|
|
3517
|
+
promptInputs,
|
|
3518
|
+
primaryProvider
|
|
3550
3519
|
);
|
|
3551
3520
|
results.push(errorResult);
|
|
3552
3521
|
if (onResult) {
|
|
@@ -3630,7 +3599,7 @@ async function runBatchEvaluation(options) {
|
|
|
3630
3599
|
agentTimeoutMs
|
|
3631
3600
|
});
|
|
3632
3601
|
} catch (error) {
|
|
3633
|
-
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
3602
|
+
const errorResult = buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
3634
3603
|
results.push(errorResult);
|
|
3635
3604
|
if (onResult) {
|
|
3636
3605
|
await onResult(errorResult);
|
|
@@ -3707,7 +3676,7 @@ async function runEvalCase(options) {
|
|
|
3707
3676
|
attempt += 1;
|
|
3708
3677
|
continue;
|
|
3709
3678
|
}
|
|
3710
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
3679
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
3711
3680
|
}
|
|
3712
3681
|
}
|
|
3713
3682
|
if (!providerResponse) {
|
|
@@ -3716,7 +3685,8 @@ async function runEvalCase(options) {
|
|
|
3716
3685
|
target.name,
|
|
3717
3686
|
nowFn(),
|
|
3718
3687
|
lastError ?? new Error("Provider did not return a response"),
|
|
3719
|
-
promptInputs
|
|
3688
|
+
promptInputs,
|
|
3689
|
+
provider
|
|
3720
3690
|
);
|
|
3721
3691
|
}
|
|
3722
3692
|
if (cacheKey && cache && !cachedResponse) {
|
|
@@ -3736,7 +3706,7 @@ async function runEvalCase(options) {
|
|
|
3736
3706
|
agentTimeoutMs
|
|
3737
3707
|
});
|
|
3738
3708
|
} catch (error) {
|
|
3739
|
-
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs);
|
|
3709
|
+
return buildErrorResult(evalCase, target.name, nowFn(), error, promptInputs, provider);
|
|
3740
3710
|
}
|
|
3741
3711
|
}
|
|
3742
3712
|
async function evaluateCandidate(options) {
|
|
@@ -3768,7 +3738,7 @@ async function evaluateCandidate(options) {
|
|
|
3768
3738
|
const completedAt = nowFn();
|
|
3769
3739
|
const rawRequest = {
|
|
3770
3740
|
question: promptInputs.question,
|
|
3771
|
-
guidelines: promptInputs.guidelines,
|
|
3741
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
3772
3742
|
guideline_paths: evalCase.guideline_paths,
|
|
3773
3743
|
system_message: promptInputs.systemMessage ?? ""
|
|
3774
3744
|
};
|
|
@@ -3986,14 +3956,14 @@ async function dumpPrompt(directory, evalCase, promptInputs) {
|
|
|
3986
3956
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
3987
3957
|
const filename = `${timestamp}_${sanitizeFilename(evalCase.id)}.json`;
|
|
3988
3958
|
const filePath = import_node_path8.default.resolve(directory, filename);
|
|
3989
|
-
await (0,
|
|
3959
|
+
await (0, import_promises5.mkdir)(import_node_path8.default.dirname(filePath), { recursive: true });
|
|
3990
3960
|
const payload = {
|
|
3991
3961
|
eval_id: evalCase.id,
|
|
3992
3962
|
question: promptInputs.question,
|
|
3993
3963
|
guidelines: promptInputs.guidelines,
|
|
3994
3964
|
guideline_paths: evalCase.guideline_paths
|
|
3995
3965
|
};
|
|
3996
|
-
await (0,
|
|
3966
|
+
await (0, import_promises5.writeFile)(filePath, JSON.stringify(payload, null, 2), "utf8");
|
|
3997
3967
|
}
|
|
3998
3968
|
function sanitizeFilename(value) {
|
|
3999
3969
|
if (!value) {
|
|
@@ -4028,11 +3998,11 @@ async function invokeProvider(provider, options) {
|
|
|
4028
3998
|
}
|
|
4029
3999
|
}
|
|
4030
4000
|
}
|
|
4031
|
-
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs) {
|
|
4001
|
+
function buildErrorResult(evalCase, targetName, timestamp, error, promptInputs, provider) {
|
|
4032
4002
|
const message = error instanceof Error ? error.message : String(error);
|
|
4033
4003
|
const rawRequest = {
|
|
4034
4004
|
question: promptInputs.question,
|
|
4035
|
-
guidelines: promptInputs.guidelines,
|
|
4005
|
+
...isAgentProvider(provider) ? {} : { guidelines: promptInputs.guidelines },
|
|
4036
4006
|
guideline_paths: evalCase.guideline_paths,
|
|
4037
4007
|
system_message: promptInputs.systemMessage ?? "",
|
|
4038
4008
|
error: message
|