@agentv/core 4.15.9-next.1 → 4.16.0-next.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HVEQNYTC.js → chunk-6VZY3B6M.js} +55 -165
- package/dist/chunk-6VZY3B6M.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +18 -17
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +13 -12
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +329 -257
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +71 -25
- package/dist/index.d.ts +71 -25
- package/dist/index.js +249 -59
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-HVEQNYTC.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -1857,6 +1857,7 @@ __export(index_exports, {
|
|
|
1857
1857
|
extractJsonBlob: () => extractJsonBlob,
|
|
1858
1858
|
extractLastAssistantContent: () => extractLastAssistantContent,
|
|
1859
1859
|
extractTargetFromSuite: () => extractTargetFromSuite,
|
|
1860
|
+
extractTargetRefsFromSuite: () => extractTargetRefsFromSuite,
|
|
1860
1861
|
extractTargetsFromSuite: () => extractTargetsFromSuite,
|
|
1861
1862
|
extractTargetsFromTestCase: () => extractTargetsFromTestCase,
|
|
1862
1863
|
extractThreshold: () => extractThreshold,
|
|
@@ -1866,6 +1867,7 @@ __export(index_exports, {
|
|
|
1866
1867
|
findGitRoot: () => findGitRoot,
|
|
1867
1868
|
freeformEvaluationSchema: () => freeformEvaluationSchema,
|
|
1868
1869
|
generateRubrics: () => generateRubrics,
|
|
1870
|
+
getAgentvConfigDir: () => getAgentvConfigDir,
|
|
1869
1871
|
getAgentvHome: () => getAgentvHome,
|
|
1870
1872
|
getBenchmark: () => getBenchmark,
|
|
1871
1873
|
getBenchmarksRegistryPath: () => getBenchmarksRegistryPath,
|
|
@@ -2655,17 +2657,76 @@ function extractTargetFromSuite(suite) {
|
|
|
2655
2657
|
}
|
|
2656
2658
|
return void 0;
|
|
2657
2659
|
}
|
|
2658
|
-
function
|
|
2660
|
+
function extractTargetRefsFromSuite(suite) {
|
|
2659
2661
|
const execution = suite.execution;
|
|
2660
2662
|
if (!execution || typeof execution !== "object" || Array.isArray(execution)) {
|
|
2661
2663
|
return void 0;
|
|
2662
2664
|
}
|
|
2663
2665
|
const targets = execution.targets;
|
|
2664
|
-
if (Array.isArray(targets)) {
|
|
2665
|
-
|
|
2666
|
-
return valid.length > 0 ? valid.map((t) => t.trim()) : void 0;
|
|
2666
|
+
if (!Array.isArray(targets)) {
|
|
2667
|
+
return void 0;
|
|
2667
2668
|
}
|
|
2668
|
-
|
|
2669
|
+
const refs = [];
|
|
2670
|
+
for (const t of targets) {
|
|
2671
|
+
if (typeof t === "string" && t.trim().length > 0) {
|
|
2672
|
+
refs.push({ name: t.trim() });
|
|
2673
|
+
} else if (t && typeof t === "object" && !Array.isArray(t) && "name" in t) {
|
|
2674
|
+
const obj = t;
|
|
2675
|
+
const name = typeof obj.name === "string" ? obj.name.trim() : "";
|
|
2676
|
+
if (name.length === 0) continue;
|
|
2677
|
+
const useTarget = typeof obj.use_target === "string" ? obj.use_target.trim() : void 0;
|
|
2678
|
+
const hooks = parseTargetHooks(obj.hooks);
|
|
2679
|
+
refs.push({
|
|
2680
|
+
name,
|
|
2681
|
+
...useTarget && { use_target: useTarget },
|
|
2682
|
+
...hooks && { hooks }
|
|
2683
|
+
});
|
|
2684
|
+
}
|
|
2685
|
+
}
|
|
2686
|
+
return refs.length > 0 ? refs : void 0;
|
|
2687
|
+
}
|
|
2688
|
+
function extractTargetsFromSuite(suite) {
|
|
2689
|
+
const refs = extractTargetRefsFromSuite(suite);
|
|
2690
|
+
if (!refs) return void 0;
|
|
2691
|
+
const names = refs.map((r) => r.name);
|
|
2692
|
+
return names.length > 0 ? names : void 0;
|
|
2693
|
+
}
|
|
2694
|
+
function parseHookConfig(raw) {
|
|
2695
|
+
if (!raw || typeof raw !== "object") return void 0;
|
|
2696
|
+
const obj = raw;
|
|
2697
|
+
let command;
|
|
2698
|
+
if (typeof obj.command === "string") {
|
|
2699
|
+
command = ["sh", "-c", obj.command];
|
|
2700
|
+
} else if (Array.isArray(obj.command)) {
|
|
2701
|
+
command = obj.command.filter((s) => typeof s === "string");
|
|
2702
|
+
} else if (typeof obj.script === "string") {
|
|
2703
|
+
command = ["sh", "-c", obj.script];
|
|
2704
|
+
} else if (Array.isArray(obj.script)) {
|
|
2705
|
+
command = obj.script.filter((s) => typeof s === "string");
|
|
2706
|
+
}
|
|
2707
|
+
if (!command || command.length === 0) return void 0;
|
|
2708
|
+
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : typeof obj.timeoutMs === "number" ? obj.timeoutMs : void 0;
|
|
2709
|
+
const cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
2710
|
+
return {
|
|
2711
|
+
command,
|
|
2712
|
+
...timeoutMs !== void 0 && { timeout_ms: timeoutMs },
|
|
2713
|
+
...cwd && { cwd }
|
|
2714
|
+
};
|
|
2715
|
+
}
|
|
2716
|
+
function parseTargetHooks(raw) {
|
|
2717
|
+
if (!raw || typeof raw !== "object") return void 0;
|
|
2718
|
+
const obj = raw;
|
|
2719
|
+
const beforeAll = parseHookConfig(obj.before_all);
|
|
2720
|
+
const beforeEach = parseHookConfig(obj.before_each);
|
|
2721
|
+
const afterEach = parseHookConfig(obj.after_each);
|
|
2722
|
+
const afterAll = parseHookConfig(obj.after_all);
|
|
2723
|
+
if (!beforeAll && !beforeEach && !afterEach && !afterAll) return void 0;
|
|
2724
|
+
return {
|
|
2725
|
+
...beforeAll && { before_all: beforeAll },
|
|
2726
|
+
...beforeEach && { before_each: beforeEach },
|
|
2727
|
+
...afterEach && { after_each: afterEach },
|
|
2728
|
+
...afterAll && { after_all: afterAll }
|
|
2729
|
+
};
|
|
2669
2730
|
}
|
|
2670
2731
|
function extractWorkersFromSuite(suite) {
|
|
2671
2732
|
const execution = suite.execution;
|
|
@@ -3337,7 +3398,11 @@ async function parseEvaluatorList(candidateEvaluators, searchRoots, evalId, defa
|
|
|
3337
3398
|
}
|
|
3338
3399
|
const placeholderIndex = result.indexOf(PLACEHOLDER);
|
|
3339
3400
|
if (strings.length > 0 && placeholderIndex !== -1) {
|
|
3340
|
-
result[placeholderIndex] = {
|
|
3401
|
+
result[placeholderIndex] = {
|
|
3402
|
+
type: "rubrics",
|
|
3403
|
+
criteria: strings,
|
|
3404
|
+
weight: strings.length
|
|
3405
|
+
};
|
|
3341
3406
|
} else if (placeholderIndex !== -1) {
|
|
3342
3407
|
result.splice(placeholderIndex, 1);
|
|
3343
3408
|
}
|
|
@@ -5739,6 +5804,7 @@ async function readTestSuiteMetadata(testFilePath) {
|
|
|
5739
5804
|
return {
|
|
5740
5805
|
target: extractTargetFromSuite(parsed),
|
|
5741
5806
|
targets: extractTargetsFromSuite(parsed),
|
|
5807
|
+
targetRefs: extractTargetRefsFromSuite(parsed),
|
|
5742
5808
|
trials: extractTrialsConfig(parsed)
|
|
5743
5809
|
};
|
|
5744
5810
|
} catch {
|
|
@@ -5765,6 +5831,7 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
5765
5831
|
tests,
|
|
5766
5832
|
trials: extractTrialsConfig(parsed),
|
|
5767
5833
|
targets: extractTargetsFromSuite(parsed),
|
|
5834
|
+
targetRefs: extractTargetRefsFromSuite(parsed),
|
|
5768
5835
|
workers: extractWorkersFromSuite(parsed),
|
|
5769
5836
|
cacheConfig: extractCacheConfig(parsed),
|
|
5770
5837
|
totalBudgetUsd: extractTotalBudgetUsd(parsed),
|
|
@@ -7519,7 +7586,7 @@ var ClaudeCliProvider = class {
|
|
|
7519
7586
|
if (options.cwd) {
|
|
7520
7587
|
spawnOptions.cwd = options.cwd;
|
|
7521
7588
|
}
|
|
7522
|
-
const child = (0, import_node_child_process.spawn)(
|
|
7589
|
+
const child = (0, import_node_child_process.spawn)(this.config.executable, options.args, spawnOptions);
|
|
7523
7590
|
let stdout = "";
|
|
7524
7591
|
let stderr = "";
|
|
7525
7592
|
let timedOut = false;
|
|
@@ -7578,7 +7645,7 @@ var ClaudeCliProvider = class {
|
|
|
7578
7645
|
if (err.code === "ENOENT") {
|
|
7579
7646
|
reject(
|
|
7580
7647
|
new Error(
|
|
7581
|
-
`Claude CLI executable '
|
|
7648
|
+
`Claude CLI executable '${this.config.executable}' was not found on PATH. Install claude-code or ensure it is in PATH.`
|
|
7582
7649
|
)
|
|
7583
7650
|
);
|
|
7584
7651
|
} else {
|
|
@@ -11671,6 +11738,9 @@ init_cjs_shims();
|
|
|
11671
11738
|
var import_node_os7 = __toESM(require("os"), 1);
|
|
11672
11739
|
var import_node_path24 = __toESM(require("path"), 1);
|
|
11673
11740
|
var logged = false;
|
|
11741
|
+
function getAgentvConfigDir() {
|
|
11742
|
+
return import_node_path24.default.join(import_node_os7.default.homedir(), ".agentv");
|
|
11743
|
+
}
|
|
11674
11744
|
function getAgentvHome() {
|
|
11675
11745
|
const envHome = process.env.AGENTV_HOME;
|
|
11676
11746
|
if (envHome && envHome !== "undefined") {
|
|
@@ -12343,6 +12413,8 @@ var ProviderRegistry = class {
|
|
|
12343
12413
|
|
|
12344
12414
|
// src/evaluation/providers/targets.ts
|
|
12345
12415
|
init_cjs_shims();
|
|
12416
|
+
var import_node_fs12 = require("fs");
|
|
12417
|
+
var import_node_os8 = require("os");
|
|
12346
12418
|
var import_node_path26 = __toESM(require("path"), 1);
|
|
12347
12419
|
var import_zod3 = require("zod");
|
|
12348
12420
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
@@ -12368,8 +12440,6 @@ var CliTargetInputSchema = import_zod3.z.object({
|
|
|
12368
12440
|
attachments_format: import_zod3.z.string().optional(),
|
|
12369
12441
|
// Working directory - optional
|
|
12370
12442
|
cwd: import_zod3.z.string().optional(),
|
|
12371
|
-
// Workspace template directory - optional (mutually exclusive with cwd)
|
|
12372
|
-
workspace_template: import_zod3.z.string().optional(),
|
|
12373
12443
|
// Timeout in seconds - optional
|
|
12374
12444
|
timeout_seconds: import_zod3.z.number().positive().optional(),
|
|
12375
12445
|
// Healthcheck configuration - optional
|
|
@@ -12404,7 +12474,6 @@ var CliTargetConfigSchema = import_zod3.z.object({
|
|
|
12404
12474
|
command: import_zod3.z.string().min(1),
|
|
12405
12475
|
filesFormat: import_zod3.z.string().optional(),
|
|
12406
12476
|
cwd: import_zod3.z.string().optional(),
|
|
12407
|
-
workspaceTemplate: import_zod3.z.string().optional(),
|
|
12408
12477
|
timeoutMs: import_zod3.z.number().positive().optional(),
|
|
12409
12478
|
healthcheck: CliHealthcheckSchema.optional(),
|
|
12410
12479
|
verbose: import_zod3.z.boolean().optional(),
|
|
@@ -12447,19 +12516,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
12447
12516
|
const command = resolveString(input.command, env, `${targetName} CLI command`, true);
|
|
12448
12517
|
const filesFormatSource = input.files_format ?? input.attachments_format;
|
|
12449
12518
|
const filesFormat = resolveOptionalLiteralString(filesFormatSource);
|
|
12450
|
-
const workspaceTemplateSource = input.workspace_template;
|
|
12451
|
-
let workspaceTemplate = resolveOptionalString(
|
|
12452
|
-
workspaceTemplateSource,
|
|
12453
|
-
env,
|
|
12454
|
-
`${targetName} workspace template`,
|
|
12455
|
-
{
|
|
12456
|
-
allowLiteral: true,
|
|
12457
|
-
optionalEnv: true
|
|
12458
|
-
}
|
|
12459
|
-
);
|
|
12460
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
12461
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12462
|
-
}
|
|
12463
12519
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
12464
12520
|
allowLiteral: true,
|
|
12465
12521
|
optionalEnv: true
|
|
@@ -12467,12 +12523,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
12467
12523
|
if (cwd && evalFilePath && !import_node_path26.default.isAbsolute(cwd)) {
|
|
12468
12524
|
cwd = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), cwd);
|
|
12469
12525
|
}
|
|
12470
|
-
if (cwd &&
|
|
12471
|
-
throw new Error(
|
|
12472
|
-
`${targetName}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
12473
|
-
);
|
|
12474
|
-
}
|
|
12475
|
-
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
12526
|
+
if (!cwd && evalFilePath) {
|
|
12476
12527
|
cwd = import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath));
|
|
12477
12528
|
}
|
|
12478
12529
|
const timeoutSeconds = input.timeout_seconds;
|
|
@@ -12484,7 +12535,6 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
12484
12535
|
command,
|
|
12485
12536
|
filesFormat,
|
|
12486
12537
|
cwd,
|
|
12487
|
-
workspaceTemplate,
|
|
12488
12538
|
timeoutMs,
|
|
12489
12539
|
healthcheck,
|
|
12490
12540
|
verbose,
|
|
@@ -12553,11 +12603,6 @@ function collectDeprecatedCamelCaseWarnings(value, location, aliases) {
|
|
|
12553
12603
|
return warnings;
|
|
12554
12604
|
}
|
|
12555
12605
|
function assertNoDeprecatedCamelCaseTargetFields(definition) {
|
|
12556
|
-
if (Object.prototype.hasOwnProperty.call(definition, "workspaceTemplate")) {
|
|
12557
|
-
throw new Error(
|
|
12558
|
-
`${definition.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
12559
|
-
);
|
|
12560
|
-
}
|
|
12561
12606
|
const warning = findDeprecatedCamelCaseTargetWarnings(
|
|
12562
12607
|
definition,
|
|
12563
12608
|
`target "${definition.name}"`
|
|
@@ -12607,7 +12652,6 @@ var BASE_TARGET_SCHEMA = import_zod3.z.object({
|
|
|
12607
12652
|
judge_target: import_zod3.z.string().optional(),
|
|
12608
12653
|
// backward compat
|
|
12609
12654
|
workers: import_zod3.z.number().int().min(1).optional(),
|
|
12610
|
-
workspace_template: import_zod3.z.string().optional(),
|
|
12611
12655
|
subagent_mode_allowed: import_zod3.z.boolean().optional(),
|
|
12612
12656
|
fallback_targets: import_zod3.z.array(import_zod3.z.string().min(1)).optional()
|
|
12613
12657
|
}).passthrough();
|
|
@@ -12704,11 +12748,6 @@ function resolveDelegatedTargetDefinition(name, definitions, env = process.env)
|
|
|
12704
12748
|
function resolveTargetDefinition(definition, env = process.env, evalFilePath, options) {
|
|
12705
12749
|
assertNoDeprecatedCamelCaseTargetFields(definition);
|
|
12706
12750
|
const parsed = BASE_TARGET_SCHEMA.parse(definition);
|
|
12707
|
-
if (parsed.workspace_template !== void 0) {
|
|
12708
|
-
throw new Error(
|
|
12709
|
-
`${parsed.name}: target-level workspace_template has been removed. Use eval-level workspace.template.`
|
|
12710
|
-
);
|
|
12711
|
-
}
|
|
12712
12751
|
if (!parsed.provider) {
|
|
12713
12752
|
throw new Error(
|
|
12714
12753
|
`${parsed.name}: 'provider' is required (targets with use_target must be resolved before calling resolveTargetDefinition)`
|
|
@@ -12805,6 +12844,20 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath, op
|
|
|
12805
12844
|
...base,
|
|
12806
12845
|
config: resolvePiCliConfig(parsed, env, evalFilePath)
|
|
12807
12846
|
};
|
|
12847
|
+
case "cc-mirror": {
|
|
12848
|
+
const variantName = resolveOptionalString(parsed.variant, env, `${parsed.name} cc-mirror variant`, {
|
|
12849
|
+
allowLiteral: true,
|
|
12850
|
+
optionalEnv: true
|
|
12851
|
+
}) ?? parsed.name;
|
|
12852
|
+
if (!parsed.executable) {
|
|
12853
|
+
parsed.executable = resolveCcMirrorBinaryPath(variantName);
|
|
12854
|
+
}
|
|
12855
|
+
return {
|
|
12856
|
+
kind: "claude-cli",
|
|
12857
|
+
...base,
|
|
12858
|
+
config: resolveClaudeConfig(parsed, env, evalFilePath)
|
|
12859
|
+
};
|
|
12860
|
+
}
|
|
12808
12861
|
case "claude":
|
|
12809
12862
|
case "claude-code":
|
|
12810
12863
|
case "claude-cli":
|
|
@@ -12993,12 +13046,11 @@ function resolveGeminiConfig(target, env) {
|
|
|
12993
13046
|
retry
|
|
12994
13047
|
};
|
|
12995
13048
|
}
|
|
12996
|
-
function resolveCodexConfig(target, env,
|
|
13049
|
+
function resolveCodexConfig(target, env, _evalFilePath) {
|
|
12997
13050
|
const modelSource = target.model;
|
|
12998
13051
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
12999
13052
|
const argsSource = target.args ?? target.arguments;
|
|
13000
13053
|
const cwdSource = target.cwd;
|
|
13001
|
-
const workspaceTemplateSource = target.workspace_template;
|
|
13002
13054
|
const timeoutSource = target.timeout_seconds;
|
|
13003
13055
|
const logDirSource = target.log_dir ?? target.log_directory;
|
|
13004
13056
|
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CODEX_LOG_FORMAT;
|
|
@@ -13021,23 +13073,6 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
13021
13073
|
allowLiteral: true,
|
|
13022
13074
|
optionalEnv: true
|
|
13023
13075
|
});
|
|
13024
|
-
let workspaceTemplate = resolveOptionalString(
|
|
13025
|
-
workspaceTemplateSource,
|
|
13026
|
-
env,
|
|
13027
|
-
`${target.name} codex workspace template`,
|
|
13028
|
-
{
|
|
13029
|
-
allowLiteral: true,
|
|
13030
|
-
optionalEnv: true
|
|
13031
|
-
}
|
|
13032
|
-
);
|
|
13033
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13034
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13035
|
-
}
|
|
13036
|
-
if (cwd && workspaceTemplate) {
|
|
13037
|
-
throw new Error(
|
|
13038
|
-
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
13039
|
-
);
|
|
13040
|
-
}
|
|
13041
13076
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} codex timeout`);
|
|
13042
13077
|
const logDir = resolveOptionalString(logDirSource, env, `${target.name} codex log directory`, {
|
|
13043
13078
|
allowLiteral: true,
|
|
@@ -13050,7 +13085,6 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
13050
13085
|
executable,
|
|
13051
13086
|
args,
|
|
13052
13087
|
cwd,
|
|
13053
|
-
workspaceTemplate,
|
|
13054
13088
|
timeoutMs,
|
|
13055
13089
|
logDir,
|
|
13056
13090
|
logFormat,
|
|
@@ -13103,13 +13137,12 @@ function resolveStreamLog(target, envFallback) {
|
|
|
13103
13137
|
deprecationWarning: `${target.name}: 'log_format' is deprecated and will be removed in v4.16. Use 'stream_log: ${streamLogEquivalent}' instead (log_format: '${normalized}' \u2192 stream_log: '${streamLogEquivalent}').`
|
|
13104
13138
|
};
|
|
13105
13139
|
}
|
|
13106
|
-
function resolveCopilotSdkConfig(target, env,
|
|
13140
|
+
function resolveCopilotSdkConfig(target, env, _evalFilePath) {
|
|
13107
13141
|
const cliUrlSource = target.cli_url;
|
|
13108
13142
|
const cliPathSource = target.cli_path;
|
|
13109
13143
|
const githubTokenSource = target.github_token;
|
|
13110
13144
|
const modelSource = target.model;
|
|
13111
13145
|
const cwdSource = target.cwd;
|
|
13112
|
-
const workspaceTemplateSource = target.workspace_template;
|
|
13113
13146
|
const timeoutSource = target.timeout_seconds;
|
|
13114
13147
|
const logDirSource = target.log_dir ?? target.log_directory;
|
|
13115
13148
|
const logFormatSource = target.log_format;
|
|
@@ -13144,23 +13177,6 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
13144
13177
|
allowLiteral: true,
|
|
13145
13178
|
optionalEnv: true
|
|
13146
13179
|
});
|
|
13147
|
-
let workspaceTemplate = resolveOptionalString(
|
|
13148
|
-
workspaceTemplateSource,
|
|
13149
|
-
env,
|
|
13150
|
-
`${target.name} copilot-sdk workspace template`,
|
|
13151
|
-
{
|
|
13152
|
-
allowLiteral: true,
|
|
13153
|
-
optionalEnv: true
|
|
13154
|
-
}
|
|
13155
|
-
);
|
|
13156
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13157
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13158
|
-
}
|
|
13159
|
-
if (cwd && workspaceTemplate) {
|
|
13160
|
-
throw new Error(
|
|
13161
|
-
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
13162
|
-
);
|
|
13163
|
-
}
|
|
13164
13180
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-sdk timeout`);
|
|
13165
13181
|
const logDir = resolveOptionalString(
|
|
13166
13182
|
logDirSource,
|
|
@@ -13225,7 +13241,6 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
13225
13241
|
githubToken,
|
|
13226
13242
|
model,
|
|
13227
13243
|
cwd,
|
|
13228
|
-
workspaceTemplate,
|
|
13229
13244
|
timeoutMs,
|
|
13230
13245
|
logDir,
|
|
13231
13246
|
logFormat,
|
|
@@ -13239,12 +13254,11 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
13239
13254
|
byokWireApi
|
|
13240
13255
|
};
|
|
13241
13256
|
}
|
|
13242
|
-
function resolveCopilotCliConfig(target, env,
|
|
13257
|
+
function resolveCopilotCliConfig(target, env, _evalFilePath) {
|
|
13243
13258
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
13244
13259
|
const modelSource = target.model;
|
|
13245
13260
|
const argsSource = target.args ?? target.arguments;
|
|
13246
13261
|
const cwdSource = target.cwd;
|
|
13247
|
-
const workspaceTemplateSource = target.workspace_template;
|
|
13248
13262
|
const timeoutSource = target.timeout_seconds;
|
|
13249
13263
|
const logDirSource = target.log_dir ?? target.log_directory;
|
|
13250
13264
|
const logFormatSource = target.log_format;
|
|
@@ -13267,23 +13281,6 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
13267
13281
|
allowLiteral: true,
|
|
13268
13282
|
optionalEnv: true
|
|
13269
13283
|
});
|
|
13270
|
-
let workspaceTemplate = resolveOptionalString(
|
|
13271
|
-
workspaceTemplateSource,
|
|
13272
|
-
env,
|
|
13273
|
-
`${target.name} copilot-cli workspace template`,
|
|
13274
|
-
{
|
|
13275
|
-
allowLiteral: true,
|
|
13276
|
-
optionalEnv: true
|
|
13277
|
-
}
|
|
13278
|
-
);
|
|
13279
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13280
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13281
|
-
}
|
|
13282
|
-
if (cwd && workspaceTemplate) {
|
|
13283
|
-
throw new Error(
|
|
13284
|
-
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
13285
|
-
);
|
|
13286
|
-
}
|
|
13287
13284
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} copilot-cli timeout`);
|
|
13288
13285
|
const logDir = resolveOptionalString(
|
|
13289
13286
|
logDirSource,
|
|
@@ -13301,7 +13298,6 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
13301
13298
|
model,
|
|
13302
13299
|
args,
|
|
13303
13300
|
cwd,
|
|
13304
|
-
workspaceTemplate,
|
|
13305
13301
|
timeoutMs,
|
|
13306
13302
|
logDir,
|
|
13307
13303
|
logFormat,
|
|
@@ -13316,14 +13312,13 @@ function normalizeCopilotLogFormat(value) {
|
|
|
13316
13312
|
if (normalized === "json" || normalized === "summary") return normalized;
|
|
13317
13313
|
throw new Error("copilot log format must be 'summary' or 'json'");
|
|
13318
13314
|
}
|
|
13319
|
-
function resolvePiCodingAgentConfig(target, env,
|
|
13315
|
+
function resolvePiCodingAgentConfig(target, env, _evalFilePath) {
|
|
13320
13316
|
const subproviderSource = target.subprovider;
|
|
13321
13317
|
const modelSource = target.model ?? target.pi_model;
|
|
13322
13318
|
const apiKeySource = target.api_key;
|
|
13323
13319
|
const toolsSource = target.tools ?? target.pi_tools;
|
|
13324
13320
|
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
13325
13321
|
const cwdSource = target.cwd;
|
|
13326
|
-
const workspaceTemplateSource = target.workspace_template;
|
|
13327
13322
|
const timeoutSource = target.timeout_seconds;
|
|
13328
13323
|
const logDirSource = target.log_dir ?? target.log_directory;
|
|
13329
13324
|
const logFormatSource = target.log_format;
|
|
@@ -13367,23 +13362,6 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
13367
13362
|
allowLiteral: true,
|
|
13368
13363
|
optionalEnv: true
|
|
13369
13364
|
});
|
|
13370
|
-
let workspaceTemplate = resolveOptionalString(
|
|
13371
|
-
workspaceTemplateSource,
|
|
13372
|
-
env,
|
|
13373
|
-
`${target.name} pi workspace template`,
|
|
13374
|
-
{
|
|
13375
|
-
allowLiteral: true,
|
|
13376
|
-
optionalEnv: true
|
|
13377
|
-
}
|
|
13378
|
-
);
|
|
13379
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13380
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13381
|
-
}
|
|
13382
|
-
if (cwd && workspaceTemplate) {
|
|
13383
|
-
throw new Error(
|
|
13384
|
-
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
13385
|
-
);
|
|
13386
|
-
}
|
|
13387
13365
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi timeout`);
|
|
13388
13366
|
const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi log directory`, {
|
|
13389
13367
|
allowLiteral: true,
|
|
@@ -13399,7 +13377,6 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
13399
13377
|
tools,
|
|
13400
13378
|
thinking,
|
|
13401
13379
|
cwd,
|
|
13402
|
-
workspaceTemplate,
|
|
13403
13380
|
timeoutMs,
|
|
13404
13381
|
logDir,
|
|
13405
13382
|
logFormat,
|
|
@@ -13407,7 +13384,7 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
13407
13384
|
systemPrompt
|
|
13408
13385
|
};
|
|
13409
13386
|
}
|
|
13410
|
-
function resolvePiCliConfig(target, env,
|
|
13387
|
+
function resolvePiCliConfig(target, env, _evalFilePath) {
|
|
13411
13388
|
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
13412
13389
|
const subproviderSource = target.subprovider;
|
|
13413
13390
|
const modelSource = target.model ?? target.pi_model;
|
|
@@ -13415,7 +13392,6 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
13415
13392
|
const toolsSource = target.tools ?? target.pi_tools;
|
|
13416
13393
|
const thinkingSource = target.thinking ?? target.pi_thinking;
|
|
13417
13394
|
const cwdSource = target.cwd;
|
|
13418
|
-
const workspaceTemplateSource = target.workspace_template;
|
|
13419
13395
|
const timeoutSource = target.timeout_seconds;
|
|
13420
13396
|
const logDirSource = target.log_dir ?? target.log_directory;
|
|
13421
13397
|
const logFormatSource = target.log_format;
|
|
@@ -13462,18 +13438,6 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
13462
13438
|
allowLiteral: true,
|
|
13463
13439
|
optionalEnv: true
|
|
13464
13440
|
});
|
|
13465
|
-
let workspaceTemplate = resolveOptionalString(
|
|
13466
|
-
workspaceTemplateSource,
|
|
13467
|
-
env,
|
|
13468
|
-
`${target.name} pi-cli workspace template`,
|
|
13469
|
-
{ allowLiteral: true, optionalEnv: true }
|
|
13470
|
-
);
|
|
13471
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13472
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13473
|
-
}
|
|
13474
|
-
if (cwd && workspaceTemplate) {
|
|
13475
|
-
throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
|
|
13476
|
-
}
|
|
13477
13441
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-cli timeout`);
|
|
13478
13442
|
const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi-cli log directory`, {
|
|
13479
13443
|
allowLiteral: true,
|
|
@@ -13491,7 +13455,6 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
13491
13455
|
thinking,
|
|
13492
13456
|
args,
|
|
13493
13457
|
cwd,
|
|
13494
|
-
workspaceTemplate,
|
|
13495
13458
|
timeoutMs,
|
|
13496
13459
|
logDir,
|
|
13497
13460
|
logFormat,
|
|
@@ -13499,10 +13462,10 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
13499
13462
|
systemPrompt
|
|
13500
13463
|
};
|
|
13501
13464
|
}
|
|
13502
|
-
function resolveClaudeConfig(target, env,
|
|
13465
|
+
function resolveClaudeConfig(target, env, _evalFilePath) {
|
|
13466
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
13503
13467
|
const modelSource = target.model;
|
|
13504
13468
|
const cwdSource = target.cwd;
|
|
13505
|
-
const workspaceTemplateSource = target.workspace_template;
|
|
13506
13469
|
const timeoutSource = target.timeout_seconds;
|
|
13507
13470
|
const logDirSource = target.log_dir ?? target.log_directory;
|
|
13508
13471
|
const logFormatSource = target.log_format ?? target.log_output_format ?? env.AGENTV_CLAUDE_LOG_FORMAT;
|
|
@@ -13512,6 +13475,10 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
13512
13475
|
process.stderr.write(`[agentv] \u26A0 ${streamLogResult.deprecationWarning}
|
|
13513
13476
|
`);
|
|
13514
13477
|
}
|
|
13478
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} claude-cli executable`, {
|
|
13479
|
+
allowLiteral: true,
|
|
13480
|
+
optionalEnv: true
|
|
13481
|
+
}) ?? "claude";
|
|
13515
13482
|
const model = resolveOptionalString(modelSource, env, `${target.name} claude model`, {
|
|
13516
13483
|
allowLiteral: true,
|
|
13517
13484
|
optionalEnv: true
|
|
@@ -13520,23 +13487,6 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
13520
13487
|
allowLiteral: true,
|
|
13521
13488
|
optionalEnv: true
|
|
13522
13489
|
});
|
|
13523
|
-
let workspaceTemplate = resolveOptionalString(
|
|
13524
|
-
workspaceTemplateSource,
|
|
13525
|
-
env,
|
|
13526
|
-
`${target.name} claude workspace template`,
|
|
13527
|
-
{
|
|
13528
|
-
allowLiteral: true,
|
|
13529
|
-
optionalEnv: true
|
|
13530
|
-
}
|
|
13531
|
-
);
|
|
13532
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13533
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13534
|
-
}
|
|
13535
|
-
if (cwd && workspaceTemplate) {
|
|
13536
|
-
throw new Error(
|
|
13537
|
-
`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive. Use 'cwd' to run in an existing directory, or 'workspace_template' to copy a template to a temp location.`
|
|
13538
|
-
);
|
|
13539
|
-
}
|
|
13540
13490
|
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} claude timeout`);
|
|
13541
13491
|
const logDir = resolveOptionalString(logDirSource, env, `${target.name} claude log directory`, {
|
|
13542
13492
|
allowLiteral: true,
|
|
@@ -13547,10 +13497,10 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
13547
13497
|
const maxTurns = typeof target.max_turns === "number" ? target.max_turns : void 0;
|
|
13548
13498
|
const maxBudgetUsd = typeof target.max_budget_usd === "number" ? target.max_budget_usd : void 0;
|
|
13549
13499
|
return {
|
|
13500
|
+
executable,
|
|
13550
13501
|
model,
|
|
13551
13502
|
systemPrompt,
|
|
13552
13503
|
cwd,
|
|
13553
|
-
workspaceTemplate,
|
|
13554
13504
|
timeoutMs,
|
|
13555
13505
|
maxTurns,
|
|
13556
13506
|
maxBudgetUsd,
|
|
@@ -13559,6 +13509,28 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
13559
13509
|
streamLog: streamLogResult.streamLog
|
|
13560
13510
|
};
|
|
13561
13511
|
}
|
|
13512
|
+
function resolveCcMirrorBinaryPath(variant) {
|
|
13513
|
+
const variantJsonPath = import_node_path26.default.join((0, import_node_os8.homedir)(), ".cc-mirror", variant, "variant.json");
|
|
13514
|
+
if (!(0, import_node_fs12.existsSync)(variantJsonPath)) {
|
|
13515
|
+
throw new Error(
|
|
13516
|
+
`cc-mirror variant "${variant}": ${variantJsonPath} not found. Install the variant or set "executable" explicitly.`
|
|
13517
|
+
);
|
|
13518
|
+
}
|
|
13519
|
+
let parsed;
|
|
13520
|
+
try {
|
|
13521
|
+
parsed = JSON.parse((0, import_node_fs12.readFileSync)(variantJsonPath, "utf8"));
|
|
13522
|
+
} catch (e) {
|
|
13523
|
+
throw new Error(
|
|
13524
|
+
`cc-mirror variant "${variant}": failed to parse ${variantJsonPath}: ${e.message}`
|
|
13525
|
+
);
|
|
13526
|
+
}
|
|
13527
|
+
if (typeof parsed.binaryPath !== "string" || parsed.binaryPath.trim().length === 0) {
|
|
13528
|
+
throw new Error(
|
|
13529
|
+
`cc-mirror variant "${variant}": ${variantJsonPath} missing "binaryPath" field`
|
|
13530
|
+
);
|
|
13531
|
+
}
|
|
13532
|
+
return parsed.binaryPath;
|
|
13533
|
+
}
|
|
13562
13534
|
function normalizeClaudeLogFormat(value) {
|
|
13563
13535
|
if (value === void 0 || value === null) {
|
|
13564
13536
|
return void 0;
|
|
@@ -13576,20 +13548,7 @@ function resolveMockConfig(target) {
|
|
|
13576
13548
|
const response = typeof target.response === "string" ? target.response : void 0;
|
|
13577
13549
|
return { response };
|
|
13578
13550
|
}
|
|
13579
|
-
function resolveVSCodeConfig(target, env, insiders,
|
|
13580
|
-
const workspaceTemplateEnvVar = resolveOptionalLiteralString(target.workspace_template);
|
|
13581
|
-
let workspaceTemplate = workspaceTemplateEnvVar ? resolveOptionalString(
|
|
13582
|
-
workspaceTemplateEnvVar,
|
|
13583
|
-
env,
|
|
13584
|
-
`${target.name} workspace template path`,
|
|
13585
|
-
{
|
|
13586
|
-
allowLiteral: true,
|
|
13587
|
-
optionalEnv: true
|
|
13588
|
-
}
|
|
13589
|
-
) : void 0;
|
|
13590
|
-
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13591
|
-
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13592
|
-
}
|
|
13551
|
+
function resolveVSCodeConfig(target, env, insiders, _evalFilePath) {
|
|
13593
13552
|
const executableSource = target.executable;
|
|
13594
13553
|
const waitSource = target.wait;
|
|
13595
13554
|
const dryRunSource = target.dry_run;
|
|
@@ -13609,7 +13568,6 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
13609
13568
|
allowLiteral: true,
|
|
13610
13569
|
optionalEnv: true
|
|
13611
13570
|
}),
|
|
13612
|
-
workspaceTemplate,
|
|
13613
13571
|
timeoutMs
|
|
13614
13572
|
};
|
|
13615
13573
|
}
|
|
@@ -13891,12 +13849,12 @@ var import_node_path35 = __toESM(require("path"), 1);
|
|
|
13891
13849
|
|
|
13892
13850
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
13893
13851
|
init_cjs_shims();
|
|
13894
|
-
var
|
|
13852
|
+
var import_node_fs13 = require("fs");
|
|
13895
13853
|
var import_promises23 = require("fs/promises");
|
|
13896
13854
|
var import_node_path27 = __toESM(require("path"), 1);
|
|
13897
13855
|
async function pathExists(target) {
|
|
13898
13856
|
try {
|
|
13899
|
-
await (0, import_promises23.access)(target,
|
|
13857
|
+
await (0, import_promises23.access)(target, import_node_fs13.constants.F_OK);
|
|
13900
13858
|
return true;
|
|
13901
13859
|
} catch {
|
|
13902
13860
|
return false;
|
|
@@ -15035,7 +14993,7 @@ var VSCodeProvider = class {
|
|
|
15035
14993
|
await this.ensureEnvironmentReady();
|
|
15036
14994
|
const inputFiles = normalizeAttachments(request.inputFiles);
|
|
15037
14995
|
const promptContent = buildPromptDocument2(request, inputFiles);
|
|
15038
|
-
const workspaceTemplate = request.workspaceFile
|
|
14996
|
+
const workspaceTemplate = request.workspaceFile;
|
|
15039
14997
|
const startTime = Date.now();
|
|
15040
14998
|
const session = await dispatchAgentSession({
|
|
15041
14999
|
userQuery: promptContent,
|
|
@@ -15091,9 +15049,6 @@ var VSCodeProvider = class {
|
|
|
15091
15049
|
const userQueries = normalizedRequests.map(
|
|
15092
15050
|
({ request, inputFiles }) => buildPromptDocument2(request, inputFiles)
|
|
15093
15051
|
);
|
|
15094
|
-
const batchWorkspaceTemplate = await resolveWorkspaceTemplateFile(
|
|
15095
|
-
this.config.workspaceTemplate
|
|
15096
|
-
);
|
|
15097
15052
|
const startTime = Date.now();
|
|
15098
15053
|
const session = await dispatchBatchAgent({
|
|
15099
15054
|
userQueries,
|
|
@@ -15103,7 +15058,7 @@ var VSCodeProvider = class {
|
|
|
15103
15058
|
dryRun: this.config.dryRun,
|
|
15104
15059
|
vscodeCmd: this.config.executable,
|
|
15105
15060
|
subagentRoot: this.config.subagentRoot,
|
|
15106
|
-
workspaceTemplate:
|
|
15061
|
+
workspaceTemplate: void 0,
|
|
15107
15062
|
silent: true,
|
|
15108
15063
|
timeoutMs: this.config.timeoutMs
|
|
15109
15064
|
});
|
|
@@ -15183,17 +15138,6 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
15183
15138
|
`VS Code executable '${candidate}' was not found on PATH. Check the 'executable' setting in your target configuration.`
|
|
15184
15139
|
);
|
|
15185
15140
|
}
|
|
15186
|
-
async function resolveWorkspaceTemplateFile(template) {
|
|
15187
|
-
if (!template) {
|
|
15188
|
-
return void 0;
|
|
15189
|
-
}
|
|
15190
|
-
try {
|
|
15191
|
-
const stats = await (0, import_promises29.stat)(import_node_path37.default.resolve(template));
|
|
15192
|
-
return stats.isFile() ? template : void 0;
|
|
15193
|
-
} catch {
|
|
15194
|
-
return template;
|
|
15195
|
-
}
|
|
15196
|
-
}
|
|
15197
15141
|
function buildPromptDocument2(request, attachments) {
|
|
15198
15142
|
const parts = [];
|
|
15199
15143
|
if (request.systemPrompt && request.systemPrompt.trim().length > 0) {
|
|
@@ -15356,7 +15300,7 @@ function isAgentProvider(provider) {
|
|
|
15356
15300
|
|
|
15357
15301
|
// src/evaluation/providers/targets-file.ts
|
|
15358
15302
|
init_cjs_shims();
|
|
15359
|
-
var
|
|
15303
|
+
var import_node_fs14 = require("fs");
|
|
15360
15304
|
var import_promises30 = require("fs/promises");
|
|
15361
15305
|
var import_node_path38 = __toESM(require("path"), 1);
|
|
15362
15306
|
var import_yaml8 = require("yaml");
|
|
@@ -15391,7 +15335,7 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
15391
15335
|
}
|
|
15392
15336
|
async function fileExists3(filePath) {
|
|
15393
15337
|
try {
|
|
15394
|
-
await (0, import_promises30.access)(filePath,
|
|
15338
|
+
await (0, import_promises30.access)(filePath, import_node_fs14.constants.F_OK);
|
|
15395
15339
|
return true;
|
|
15396
15340
|
} catch {
|
|
15397
15341
|
return false;
|
|
@@ -15575,7 +15519,7 @@ function negateScore(score) {
|
|
|
15575
15519
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
15576
15520
|
init_cjs_shims();
|
|
15577
15521
|
var import_promises31 = require("fs/promises");
|
|
15578
|
-
var
|
|
15522
|
+
var import_node_os9 = require("os");
|
|
15579
15523
|
var import_node_path40 = require("path");
|
|
15580
15524
|
init_exec();
|
|
15581
15525
|
|
|
@@ -15940,7 +15884,7 @@ var CodeEvaluator = class {
|
|
|
15940
15884
|
let imageTmpDir;
|
|
15941
15885
|
const getImageDir = async () => {
|
|
15942
15886
|
if (!imageTmpDir) {
|
|
15943
|
-
imageTmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0,
|
|
15887
|
+
imageTmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os9.tmpdir)(), "agentv-img-"));
|
|
15944
15888
|
}
|
|
15945
15889
|
return imageTmpDir;
|
|
15946
15890
|
};
|
|
@@ -15953,7 +15897,7 @@ var CodeEvaluator = class {
|
|
|
15953
15897
|
if (outputForPayload) {
|
|
15954
15898
|
const serialized = JSON.stringify(outputForPayload);
|
|
15955
15899
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
15956
|
-
const tmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0,
|
|
15900
|
+
const tmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os9.tmpdir)(), "agentv-grader-"));
|
|
15957
15901
|
outputPath = (0, import_node_path40.join)(tmpDir, "output.json");
|
|
15958
15902
|
await (0, import_promises31.writeFile)(outputPath, serialized);
|
|
15959
15903
|
outputForPayload = null;
|
|
@@ -16452,7 +16396,7 @@ ${context2.fileChanges}`;
|
|
|
16452
16396
|
const workspacePath = context2.workspacePath;
|
|
16453
16397
|
if (!workspacePath) {
|
|
16454
16398
|
throw new Error(
|
|
16455
|
-
"llm-grader built-in agent mode requires a
|
|
16399
|
+
"llm-grader built-in agent mode requires a workspace (workspacePath is not set)"
|
|
16456
16400
|
);
|
|
16457
16401
|
}
|
|
16458
16402
|
const systemPrompt = this.buildAgentSystemPrompt(context2);
|
|
@@ -17191,11 +17135,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
17191
17135
|
execute: async (input) => {
|
|
17192
17136
|
try {
|
|
17193
17137
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
17194
|
-
const
|
|
17195
|
-
if (
|
|
17138
|
+
const stat12 = await import_promises32.default.stat(resolved);
|
|
17139
|
+
if (stat12.isDirectory()) {
|
|
17196
17140
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
17197
17141
|
}
|
|
17198
|
-
const buffer = Buffer.alloc(Math.min(
|
|
17142
|
+
const buffer = Buffer.alloc(Math.min(stat12.size, MAX_FILE_SIZE));
|
|
17199
17143
|
const fd = await import_promises32.default.open(resolved, "r");
|
|
17200
17144
|
try {
|
|
17201
17145
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -17203,8 +17147,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
17203
17147
|
await fd.close();
|
|
17204
17148
|
}
|
|
17205
17149
|
const content = buffer.toString("utf-8");
|
|
17206
|
-
const truncated =
|
|
17207
|
-
return { content, truncated, size:
|
|
17150
|
+
const truncated = stat12.size > MAX_FILE_SIZE;
|
|
17151
|
+
return { content, truncated, size: stat12.size };
|
|
17208
17152
|
} catch (error) {
|
|
17209
17153
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
17210
17154
|
}
|
|
@@ -17255,8 +17199,8 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
17255
17199
|
const ext = import_node_path41.default.extname(entry.name).toLowerCase();
|
|
17256
17200
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
17257
17201
|
try {
|
|
17258
|
-
const
|
|
17259
|
-
if (
|
|
17202
|
+
const stat12 = await import_promises32.default.stat(fullPath);
|
|
17203
|
+
if (stat12.size > MAX_FILE_SIZE) continue;
|
|
17260
17204
|
const content = await import_promises32.default.readFile(fullPath, "utf-8");
|
|
17261
17205
|
const lines = content.split("\n");
|
|
17262
17206
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -19159,7 +19103,7 @@ function runEqualsAssertion(output, value) {
|
|
|
19159
19103
|
init_cjs_shims();
|
|
19160
19104
|
var import_node_child_process11 = require("child_process");
|
|
19161
19105
|
var import_node_crypto11 = require("crypto");
|
|
19162
|
-
var
|
|
19106
|
+
var import_node_fs17 = require("fs");
|
|
19163
19107
|
var import_promises36 = require("fs/promises");
|
|
19164
19108
|
var import_node_path49 = __toESM(require("path"), 1);
|
|
19165
19109
|
var import_node_util7 = require("util");
|
|
@@ -20064,7 +20008,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
20064
20008
|
init_cjs_shims();
|
|
20065
20009
|
var import_node_child_process9 = require("child_process");
|
|
20066
20010
|
var import_node_crypto10 = require("crypto");
|
|
20067
|
-
var
|
|
20011
|
+
var import_node_fs15 = require("fs");
|
|
20068
20012
|
var import_promises34 = require("fs/promises");
|
|
20069
20013
|
var import_node_path46 = __toESM(require("path"), 1);
|
|
20070
20014
|
var import_node_util5 = require("util");
|
|
@@ -20172,7 +20116,7 @@ var WorkspacePoolManager = class {
|
|
|
20172
20116
|
if (!locked) {
|
|
20173
20117
|
continue;
|
|
20174
20118
|
}
|
|
20175
|
-
const slotExists = (0,
|
|
20119
|
+
const slotExists = (0, import_node_fs15.existsSync)(slotPath);
|
|
20176
20120
|
if (slotExists) {
|
|
20177
20121
|
await this.resetSlot(slotPath, templatePath, repos, poolReset);
|
|
20178
20122
|
return {
|
|
@@ -20278,7 +20222,7 @@ var WorkspacePoolManager = class {
|
|
|
20278
20222
|
for (const entry of entries) {
|
|
20279
20223
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
20280
20224
|
const lockPath = import_node_path46.default.join(poolDir, `${entry}.lock`);
|
|
20281
|
-
if ((0,
|
|
20225
|
+
if ((0, import_node_fs15.existsSync)(lockPath)) {
|
|
20282
20226
|
try {
|
|
20283
20227
|
const pidStr = await (0, import_promises34.readFile)(lockPath, "utf-8");
|
|
20284
20228
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
@@ -20310,7 +20254,7 @@ var WorkspacePoolManager = class {
|
|
|
20310
20254
|
for (const repo of repos) {
|
|
20311
20255
|
if (!repo.path || !repo.source) continue;
|
|
20312
20256
|
const repoDir = import_node_path46.default.join(slotPath, repo.path);
|
|
20313
|
-
if (!(0,
|
|
20257
|
+
if (!(0, import_node_fs15.existsSync)(repoDir)) {
|
|
20314
20258
|
continue;
|
|
20315
20259
|
}
|
|
20316
20260
|
if (poolReset === "none") {
|
|
@@ -20346,7 +20290,7 @@ var WorkspacePoolManager = class {
|
|
|
20346
20290
|
// src/evaluation/workspace/repo-manager.ts
|
|
20347
20291
|
init_cjs_shims();
|
|
20348
20292
|
var import_node_child_process10 = require("child_process");
|
|
20349
|
-
var
|
|
20293
|
+
var import_node_fs16 = require("fs");
|
|
20350
20294
|
var import_node_path47 = __toESM(require("path"), 1);
|
|
20351
20295
|
var import_node_util6 = require("util");
|
|
20352
20296
|
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process10.execFile);
|
|
@@ -20398,7 +20342,7 @@ var RepoManager = class {
|
|
|
20398
20342
|
resolvedSourcePath: sourcePath ?? "",
|
|
20399
20343
|
reason: "empty_path"
|
|
20400
20344
|
});
|
|
20401
|
-
} else if (!(0,
|
|
20345
|
+
} else if (!(0, import_node_fs16.existsSync)(sourcePath)) {
|
|
20402
20346
|
errors.push({
|
|
20403
20347
|
repoPath: repo.path ?? "(none)",
|
|
20404
20348
|
resolvedSourcePath: sourcePath,
|
|
@@ -20692,7 +20636,7 @@ function workspaceGitEnv() {
|
|
|
20692
20636
|
};
|
|
20693
20637
|
}
|
|
20694
20638
|
async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
|
|
20695
|
-
if (!(0,
|
|
20639
|
+
if (!(0, import_node_fs17.existsSync)(import_node_path49.default.join(workspacePath, ".git"))) {
|
|
20696
20640
|
return false;
|
|
20697
20641
|
}
|
|
20698
20642
|
const cleanFlag = resetMode === "strict" ? "-fdx" : "-fd";
|
|
@@ -20706,13 +20650,6 @@ async function resetWorkspaceRoot(workspacePath, resetMode, baselineRef) {
|
|
|
20706
20650
|
await execFileAsync3("git", ["clean", cleanFlag], opts);
|
|
20707
20651
|
return true;
|
|
20708
20652
|
}
|
|
20709
|
-
function getWorkspaceTemplate(target) {
|
|
20710
|
-
const config = target.config;
|
|
20711
|
-
if ("workspaceTemplate" in config && typeof config.workspaceTemplate === "string") {
|
|
20712
|
-
return config.workspaceTemplate;
|
|
20713
|
-
}
|
|
20714
|
-
return void 0;
|
|
20715
|
-
}
|
|
20716
20653
|
function validateDependencyGraph(tests) {
|
|
20717
20654
|
const ids = /* @__PURE__ */ new Set();
|
|
20718
20655
|
for (const test of tests) {
|
|
@@ -20986,7 +20923,7 @@ async function runEvaluation(options) {
|
|
|
20986
20923
|
}
|
|
20987
20924
|
}
|
|
20988
20925
|
const suiteWorkspace = filteredEvalCases[0]?.workspace;
|
|
20989
|
-
const rawTemplate = suiteWorkspace?.template
|
|
20926
|
+
const rawTemplate = suiteWorkspace?.template;
|
|
20990
20927
|
const resolvedTemplate = await resolveWorkspaceTemplate(rawTemplate);
|
|
20991
20928
|
const workspaceTemplate = resolvedTemplate?.dir;
|
|
20992
20929
|
let suiteWorkspaceFile = resolvedTemplate?.workspaceFile;
|
|
@@ -21184,7 +21121,7 @@ async function runEvaluation(options) {
|
|
|
21184
21121
|
for (const repo of suiteWorkspace.repos) {
|
|
21185
21122
|
if (!repo.path || !repo.source) continue;
|
|
21186
21123
|
const targetDir = import_node_path49.default.join(sharedWorkspacePath, repo.path);
|
|
21187
|
-
if ((0,
|
|
21124
|
+
if ((0, import_node_fs17.existsSync)(targetDir)) {
|
|
21188
21125
|
setupLog(`reusing existing repo at: ${targetDir}`);
|
|
21189
21126
|
continue;
|
|
21190
21127
|
}
|
|
@@ -21274,6 +21211,54 @@ async function runEvaluation(options) {
|
|
|
21274
21211
|
}
|
|
21275
21212
|
}
|
|
21276
21213
|
}
|
|
21214
|
+
const targetHooks = options.targetHooks;
|
|
21215
|
+
const targetBeforeAllHook = targetHooks?.before_all;
|
|
21216
|
+
if (sharedWorkspacePath && hasHookCommand(targetBeforeAllHook)) {
|
|
21217
|
+
const beforeAllCommand = (targetBeforeAllHook.command ?? []).join(" ");
|
|
21218
|
+
setupLog(`running target before_all command=${beforeAllCommand}`);
|
|
21219
|
+
const scriptContext = {
|
|
21220
|
+
workspacePath: sharedWorkspacePath,
|
|
21221
|
+
testId: "__target_before_all__",
|
|
21222
|
+
evalRunId,
|
|
21223
|
+
evalDir,
|
|
21224
|
+
workspaceFileDir: suiteWorkspace?.workspaceFileDir
|
|
21225
|
+
};
|
|
21226
|
+
try {
|
|
21227
|
+
await executeWorkspaceScript(
|
|
21228
|
+
toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
|
|
21229
|
+
scriptContext
|
|
21230
|
+
);
|
|
21231
|
+
setupLog("target before_all completed");
|
|
21232
|
+
} catch (error) {
|
|
21233
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
21234
|
+
if (sharedWorkspacePath && !useStaticWorkspace) {
|
|
21235
|
+
await cleanupWorkspace(sharedWorkspacePath).catch(() => {
|
|
21236
|
+
});
|
|
21237
|
+
}
|
|
21238
|
+
throw new Error(`target before_all hook failed: ${message}`);
|
|
21239
|
+
}
|
|
21240
|
+
}
|
|
21241
|
+
if (availablePoolSlots.length > 0 && hasHookCommand(targetBeforeAllHook)) {
|
|
21242
|
+
for (const slot of availablePoolSlots) {
|
|
21243
|
+
setupLog(`running target before_all on pool slot ${slot.index}`);
|
|
21244
|
+
const scriptContext = {
|
|
21245
|
+
workspacePath: slot.path,
|
|
21246
|
+
testId: "__target_before_all__",
|
|
21247
|
+
evalRunId,
|
|
21248
|
+
evalDir,
|
|
21249
|
+
workspaceFileDir: suiteWorkspace?.workspaceFileDir
|
|
21250
|
+
};
|
|
21251
|
+
try {
|
|
21252
|
+
await executeWorkspaceScript(
|
|
21253
|
+
toScriptConfig(targetBeforeAllHook, "before_all", "target hooks"),
|
|
21254
|
+
scriptContext
|
|
21255
|
+
);
|
|
21256
|
+
} catch (error) {
|
|
21257
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
21258
|
+
throw new Error(`target before_all hook failed on pool slot ${slot.index}: ${message}`);
|
|
21259
|
+
}
|
|
21260
|
+
}
|
|
21261
|
+
}
|
|
21277
21262
|
if (sharedWorkspacePath) {
|
|
21278
21263
|
try {
|
|
21279
21264
|
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
@@ -21419,6 +21404,7 @@ async function runEvaluation(options) {
|
|
|
21419
21404
|
evalDir,
|
|
21420
21405
|
verbose,
|
|
21421
21406
|
threshold: scoreThreshold,
|
|
21407
|
+
targetHooks: options.targetHooks,
|
|
21422
21408
|
...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
|
|
21423
21409
|
};
|
|
21424
21410
|
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
@@ -21560,6 +21546,26 @@ async function runEvaluation(options) {
|
|
|
21560
21546
|
}
|
|
21561
21547
|
}
|
|
21562
21548
|
const afterAllWorkspaces = poolSlots.length > 1 ? poolSlots.map((s) => s.path) : sharedWorkspacePath ? [sharedWorkspacePath] : [];
|
|
21549
|
+
const targetAfterAllHook = targetHooks?.after_all;
|
|
21550
|
+
if (afterAllWorkspaces.length > 0 && hasHookCommand(targetAfterAllHook)) {
|
|
21551
|
+
for (const wsPath of afterAllWorkspaces) {
|
|
21552
|
+
const scriptContext = {
|
|
21553
|
+
workspacePath: wsPath,
|
|
21554
|
+
testId: "__target_after_all__",
|
|
21555
|
+
evalRunId,
|
|
21556
|
+
evalDir,
|
|
21557
|
+
workspaceFileDir: suiteWorkspace?.workspaceFileDir
|
|
21558
|
+
};
|
|
21559
|
+
try {
|
|
21560
|
+
await executeWorkspaceScript(
|
|
21561
|
+
toScriptConfig(targetAfterAllHook, "after_all", "target hooks"),
|
|
21562
|
+
scriptContext,
|
|
21563
|
+
"warn"
|
|
21564
|
+
);
|
|
21565
|
+
} catch {
|
|
21566
|
+
}
|
|
21567
|
+
}
|
|
21568
|
+
}
|
|
21563
21569
|
const suiteAfterAllHook = suiteWorkspace?.hooks?.after_all;
|
|
21564
21570
|
if (afterAllWorkspaces.length > 0 && suiteHooksEnabled && hasHookCommand(suiteAfterAllHook)) {
|
|
21565
21571
|
const afterAllHook = suiteAfterAllHook;
|
|
@@ -21822,7 +21828,7 @@ async function runEvalCase(options) {
|
|
|
21822
21828
|
let caseWorkspaceFile;
|
|
21823
21829
|
const caseHooksEnabled = hooksEnabled(evalCase.workspace);
|
|
21824
21830
|
if (!workspacePath) {
|
|
21825
|
-
const rawCaseTemplate = evalCase.workspace?.template
|
|
21831
|
+
const rawCaseTemplate = evalCase.workspace?.template;
|
|
21826
21832
|
const resolvedCaseTemplate = await resolveWorkspaceTemplate(rawCaseTemplate);
|
|
21827
21833
|
const caseWorkspaceTemplate = resolvedCaseTemplate?.dir;
|
|
21828
21834
|
caseWorkspaceFile = resolvedCaseTemplate?.workspaceFile;
|
|
@@ -22040,6 +22046,38 @@ async function runEvalCase(options) {
|
|
|
22040
22046
|
);
|
|
22041
22047
|
}
|
|
22042
22048
|
}
|
|
22049
|
+
const targetBeforeEachHook = options.targetHooks?.before_each;
|
|
22050
|
+
if (workspacePath && hasHookCommand(targetBeforeEachHook)) {
|
|
22051
|
+
const scriptContext = {
|
|
22052
|
+
workspacePath,
|
|
22053
|
+
testId: evalCase.id,
|
|
22054
|
+
evalRunId: evalRunId ?? "",
|
|
22055
|
+
caseInput: evalCase.question,
|
|
22056
|
+
caseMetadata: evalCase.metadata,
|
|
22057
|
+
evalDir,
|
|
22058
|
+
workspaceFileDir: evalCase.workspace?.workspaceFileDir
|
|
22059
|
+
};
|
|
22060
|
+
try {
|
|
22061
|
+
await executeWorkspaceScript(
|
|
22062
|
+
toScriptConfig(targetBeforeEachHook, "before_each", `target hook for '${evalCase.id}'`),
|
|
22063
|
+
scriptContext
|
|
22064
|
+
);
|
|
22065
|
+
beforeEachNeedsFreshBaseline = true;
|
|
22066
|
+
} catch (error) {
|
|
22067
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
22068
|
+
return buildErrorResult(
|
|
22069
|
+
evalCase,
|
|
22070
|
+
target.name,
|
|
22071
|
+
nowFn(),
|
|
22072
|
+
new Error(`target before_each hook failed: ${message}`),
|
|
22073
|
+
promptInputs,
|
|
22074
|
+
provider,
|
|
22075
|
+
"setup",
|
|
22076
|
+
"script_error",
|
|
22077
|
+
verbose
|
|
22078
|
+
);
|
|
22079
|
+
}
|
|
22080
|
+
}
|
|
22043
22081
|
let baselineCommit = beforeEachNeedsFreshBaseline ? void 0 : sharedBaselineCommit;
|
|
22044
22082
|
if (!baselineCommit && workspacePath) {
|
|
22045
22083
|
try {
|
|
@@ -22194,6 +22232,26 @@ async function runEvalCase(options) {
|
|
|
22194
22232
|
${providerFileChanges}` : providerFileChanges;
|
|
22195
22233
|
}
|
|
22196
22234
|
const providerError = extractProviderError(providerResponse);
|
|
22235
|
+
const targetAfterEachHook = options.targetHooks?.after_each;
|
|
22236
|
+
if (workspacePath && hasHookCommand(targetAfterEachHook)) {
|
|
22237
|
+
const scriptContext = {
|
|
22238
|
+
workspacePath,
|
|
22239
|
+
testId: evalCase.id,
|
|
22240
|
+
evalRunId: evalRunId ?? "",
|
|
22241
|
+
caseInput: evalCase.question,
|
|
22242
|
+
caseMetadata: evalCase.metadata,
|
|
22243
|
+
evalDir,
|
|
22244
|
+
workspaceFileDir: evalCase.workspace?.workspaceFileDir
|
|
22245
|
+
};
|
|
22246
|
+
try {
|
|
22247
|
+
await executeWorkspaceScript(
|
|
22248
|
+
toScriptConfig(targetAfterEachHook, "after_each", `target hook for '${evalCase.id}'`),
|
|
22249
|
+
scriptContext,
|
|
22250
|
+
"warn"
|
|
22251
|
+
);
|
|
22252
|
+
} catch {
|
|
22253
|
+
}
|
|
22254
|
+
}
|
|
22197
22255
|
if (caseHooksEnabled && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none") {
|
|
22198
22256
|
try {
|
|
22199
22257
|
if (repoManager && evalCase.workspace.repos?.length) {
|
|
@@ -23324,7 +23382,7 @@ function computeWeightedMean(entries) {
|
|
|
23324
23382
|
|
|
23325
23383
|
// src/evaluation/evaluate.ts
|
|
23326
23384
|
init_cjs_shims();
|
|
23327
|
-
var
|
|
23385
|
+
var import_node_fs18 = require("fs");
|
|
23328
23386
|
var import_node_path50 = __toESM(require("path"), 1);
|
|
23329
23387
|
|
|
23330
23388
|
// src/evaluation/providers/function-provider.ts
|
|
@@ -23482,7 +23540,7 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
23482
23540
|
for (const dir of chain) {
|
|
23483
23541
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
23484
23542
|
const targetsPath = import_node_path50.default.join(dir, candidate);
|
|
23485
|
-
if (!(0,
|
|
23543
|
+
if (!(0, import_node_fs18.existsSync)(targetsPath)) continue;
|
|
23486
23544
|
try {
|
|
23487
23545
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
23488
23546
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -23494,16 +23552,16 @@ async function discoverDefaultTarget(repoRoot) {
|
|
|
23494
23552
|
return null;
|
|
23495
23553
|
}
|
|
23496
23554
|
async function loadEnvHierarchy(repoRoot, startPath) {
|
|
23497
|
-
const { readFileSync:
|
|
23555
|
+
const { readFileSync: readFileSync6 } = await import("fs");
|
|
23498
23556
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
23499
23557
|
const envFiles = [];
|
|
23500
23558
|
for (const dir of chain) {
|
|
23501
23559
|
const envPath = import_node_path50.default.join(dir, ".env");
|
|
23502
|
-
if ((0,
|
|
23560
|
+
if ((0, import_node_fs18.existsSync)(envPath)) envFiles.push(envPath);
|
|
23503
23561
|
}
|
|
23504
23562
|
for (let i = 0; i < envFiles.length; i++) {
|
|
23505
23563
|
try {
|
|
23506
|
-
const content =
|
|
23564
|
+
const content = readFileSync6(envFiles[i], "utf8");
|
|
23507
23565
|
for (const line of content.split("\n")) {
|
|
23508
23566
|
const trimmed = line.trim();
|
|
23509
23567
|
if (!trimmed || trimmed.startsWith("#")) continue;
|
|
@@ -23576,12 +23634,12 @@ var CONFIG_FILE_NAMES = [
|
|
|
23576
23634
|
".agentv/config.js"
|
|
23577
23635
|
];
|
|
23578
23636
|
async function loadTsConfig(projectRoot) {
|
|
23579
|
-
const { existsSync:
|
|
23637
|
+
const { existsSync: existsSync10 } = await import("fs");
|
|
23580
23638
|
const { pathToFileURL: pathToFileURL2 } = await import("url");
|
|
23581
23639
|
const { join: join2 } = await import("path");
|
|
23582
23640
|
for (const fileName of CONFIG_FILE_NAMES) {
|
|
23583
23641
|
const filePath = join2(projectRoot, fileName);
|
|
23584
|
-
if (!
|
|
23642
|
+
if (!existsSync10(filePath)) {
|
|
23585
23643
|
continue;
|
|
23586
23644
|
}
|
|
23587
23645
|
try {
|
|
@@ -23830,9 +23888,9 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
23830
23888
|
// src/evaluation/results-repo.ts
|
|
23831
23889
|
init_cjs_shims();
|
|
23832
23890
|
var import_node_child_process12 = require("child_process");
|
|
23833
|
-
var
|
|
23891
|
+
var import_node_fs19 = require("fs");
|
|
23834
23892
|
var import_promises39 = require("fs/promises");
|
|
23835
|
-
var
|
|
23893
|
+
var import_node_os10 = __toESM(require("os"), 1);
|
|
23836
23894
|
var import_node_path53 = __toESM(require("path"), 1);
|
|
23837
23895
|
var import_node_util8 = require("util");
|
|
23838
23896
|
var execFileAsync4 = (0, import_node_util8.promisify)(import_node_child_process12.execFile);
|
|
@@ -23870,18 +23928,18 @@ function getResultsRepoCachePaths(repo) {
|
|
|
23870
23928
|
};
|
|
23871
23929
|
}
|
|
23872
23930
|
function readPersistedStatus(statusFile) {
|
|
23873
|
-
if (!(0,
|
|
23931
|
+
if (!(0, import_node_fs19.existsSync)(statusFile)) {
|
|
23874
23932
|
return {};
|
|
23875
23933
|
}
|
|
23876
23934
|
try {
|
|
23877
|
-
return JSON.parse((0,
|
|
23935
|
+
return JSON.parse((0, import_node_fs19.readFileSync)(statusFile, "utf8"));
|
|
23878
23936
|
} catch {
|
|
23879
23937
|
return {};
|
|
23880
23938
|
}
|
|
23881
23939
|
}
|
|
23882
23940
|
function writePersistedStatus(statusFile, status) {
|
|
23883
|
-
(0,
|
|
23884
|
-
(0,
|
|
23941
|
+
(0, import_node_fs19.mkdirSync)(import_node_path53.default.dirname(statusFile), { recursive: true });
|
|
23942
|
+
(0, import_node_fs19.writeFileSync)(statusFile, `${JSON.stringify(status, null, 2)}
|
|
23885
23943
|
`, "utf8");
|
|
23886
23944
|
}
|
|
23887
23945
|
async function runCommand(executable, args, options) {
|
|
@@ -23943,8 +24001,8 @@ function updateStatusFile(config, patch) {
|
|
|
23943
24001
|
async function ensureResultsRepoClone(config) {
|
|
23944
24002
|
const normalized = normalizeResultsExportConfig(config);
|
|
23945
24003
|
const cachePaths = getResultsRepoCachePaths(normalized.repo);
|
|
23946
|
-
(0,
|
|
23947
|
-
if (!(0,
|
|
24004
|
+
(0, import_node_fs19.mkdirSync)(cachePaths.rootDir, { recursive: true });
|
|
24005
|
+
if (!(0, import_node_fs19.existsSync)(cachePaths.repoDir)) {
|
|
23948
24006
|
try {
|
|
23949
24007
|
await runGit([
|
|
23950
24008
|
"clone",
|
|
@@ -23958,7 +24016,7 @@ async function ensureResultsRepoClone(config) {
|
|
|
23958
24016
|
throw withFriendlyGitHubAuthError(error);
|
|
23959
24017
|
}
|
|
23960
24018
|
}
|
|
23961
|
-
if (!(0,
|
|
24019
|
+
if (!(0, import_node_fs19.existsSync)(import_node_path53.default.join(cachePaths.repoDir, ".git"))) {
|
|
23962
24020
|
throw new Error(`Results repo cache is not a git repository: ${cachePaths.repoDir}`);
|
|
23963
24021
|
}
|
|
23964
24022
|
return cachePaths.repoDir;
|
|
@@ -23977,7 +24035,7 @@ function getResultsRepoStatus(config) {
|
|
|
23977
24035
|
const persisted = readPersistedStatus(cachePaths.statusFile);
|
|
23978
24036
|
return {
|
|
23979
24037
|
configured: true,
|
|
23980
|
-
available: (0,
|
|
24038
|
+
available: (0, import_node_fs19.existsSync)(cachePaths.repoDir),
|
|
23981
24039
|
repo: normalized.repo,
|
|
23982
24040
|
path: normalized.path,
|
|
23983
24041
|
auto_push: normalized.auto_push,
|
|
@@ -24023,7 +24081,7 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
24023
24081
|
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
24024
24082
|
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
24025
24083
|
await updateCacheRepo(cloneDir, baseBranch);
|
|
24026
|
-
const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path53.default.join(
|
|
24084
|
+
const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path53.default.join(import_node_os10.default.tmpdir(), "agentv-results-repo-"));
|
|
24027
24085
|
const worktreeDir = import_node_path53.default.join(worktreeRoot, "repo");
|
|
24028
24086
|
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
24029
24087
|
cwd: cloneDir
|
|
@@ -24042,8 +24100,8 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
24042
24100
|
};
|
|
24043
24101
|
}
|
|
24044
24102
|
async function stageResultsArtifacts(params) {
|
|
24045
|
-
(0,
|
|
24046
|
-
(0,
|
|
24103
|
+
(0, import_node_fs19.rmSync)(params.destinationDir, { recursive: true, force: true });
|
|
24104
|
+
(0, import_node_fs19.mkdirSync)(import_node_path53.default.dirname(params.destinationDir), { recursive: true });
|
|
24047
24105
|
await (0, import_promises39.cp)(params.sourceDir, params.destinationDir, { recursive: true });
|
|
24048
24106
|
}
|
|
24049
24107
|
function resolveResultsRepoRunsDir(config) {
|
|
@@ -24111,19 +24169,31 @@ async function createDraftResultsPr(params) {
|
|
|
24111
24169
|
|
|
24112
24170
|
// src/benchmarks.ts
|
|
24113
24171
|
init_cjs_shims();
|
|
24114
|
-
var
|
|
24172
|
+
var import_node_fs20 = require("fs");
|
|
24115
24173
|
var import_node_path54 = __toESM(require("path"), 1);
|
|
24116
24174
|
var import_yaml10 = require("yaml");
|
|
24117
24175
|
function getBenchmarksRegistryPath() {
|
|
24118
|
-
return import_node_path54.default.join(
|
|
24176
|
+
return import_node_path54.default.join(getAgentvConfigDir(), "projects.yaml");
|
|
24177
|
+
}
|
|
24178
|
+
function migrateProjectsYaml(targetPath) {
|
|
24179
|
+
const dataHome = getAgentvHome();
|
|
24180
|
+
const configDir = getAgentvConfigDir();
|
|
24181
|
+
if (dataHome === configDir) return;
|
|
24182
|
+
const legacyPath = import_node_path54.default.join(dataHome, "projects.yaml");
|
|
24183
|
+
if (!(0, import_node_fs20.existsSync)(legacyPath)) return;
|
|
24184
|
+
(0, import_node_fs20.mkdirSync)(import_node_path54.default.dirname(targetPath), { recursive: true });
|
|
24185
|
+
(0, import_node_fs20.copyFileSync)(legacyPath, targetPath);
|
|
24119
24186
|
}
|
|
24120
24187
|
function loadBenchmarkRegistry() {
|
|
24121
24188
|
const registryPath = getBenchmarksRegistryPath();
|
|
24122
|
-
if (!(0,
|
|
24189
|
+
if (!(0, import_node_fs20.existsSync)(registryPath)) {
|
|
24190
|
+
migrateProjectsYaml(registryPath);
|
|
24191
|
+
}
|
|
24192
|
+
if (!(0, import_node_fs20.existsSync)(registryPath)) {
|
|
24123
24193
|
return { benchmarks: [] };
|
|
24124
24194
|
}
|
|
24125
24195
|
try {
|
|
24126
|
-
const raw = (0,
|
|
24196
|
+
const raw = (0, import_node_fs20.readFileSync)(registryPath, "utf-8");
|
|
24127
24197
|
const parsed = (0, import_yaml10.parse)(raw);
|
|
24128
24198
|
if (!parsed || !Array.isArray(parsed.benchmarks)) {
|
|
24129
24199
|
return { benchmarks: [] };
|
|
@@ -24136,10 +24206,10 @@ function loadBenchmarkRegistry() {
|
|
|
24136
24206
|
function saveBenchmarkRegistry(registry) {
|
|
24137
24207
|
const registryPath = getBenchmarksRegistryPath();
|
|
24138
24208
|
const dir = import_node_path54.default.dirname(registryPath);
|
|
24139
|
-
if (!(0,
|
|
24140
|
-
(0,
|
|
24209
|
+
if (!(0, import_node_fs20.existsSync)(dir)) {
|
|
24210
|
+
(0, import_node_fs20.mkdirSync)(dir, { recursive: true });
|
|
24141
24211
|
}
|
|
24142
|
-
(0,
|
|
24212
|
+
(0, import_node_fs20.writeFileSync)(registryPath, (0, import_yaml10.stringify)({ benchmarks: registry.benchmarks }), "utf-8");
|
|
24143
24213
|
}
|
|
24144
24214
|
function deriveBenchmarkId(dirPath, existingIds) {
|
|
24145
24215
|
const base = import_node_path54.default.basename(dirPath).toLowerCase().replace(/[^a-z0-9-]/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
@@ -24153,10 +24223,10 @@ function deriveBenchmarkId(dirPath, existingIds) {
|
|
|
24153
24223
|
}
|
|
24154
24224
|
function addBenchmark(benchmarkPath) {
|
|
24155
24225
|
const absPath = import_node_path54.default.resolve(benchmarkPath);
|
|
24156
|
-
if (!(0,
|
|
24226
|
+
if (!(0, import_node_fs20.existsSync)(absPath)) {
|
|
24157
24227
|
throw new Error(`Directory not found: ${absPath}`);
|
|
24158
24228
|
}
|
|
24159
|
-
if (!(0,
|
|
24229
|
+
if (!(0, import_node_fs20.existsSync)(import_node_path54.default.join(absPath, ".agentv"))) {
|
|
24160
24230
|
throw new Error(`No .agentv/ directory found in ${absPath}. Run an evaluation first.`);
|
|
24161
24231
|
}
|
|
24162
24232
|
const registry = loadBenchmarkRegistry();
|
|
@@ -24200,19 +24270,19 @@ function touchBenchmark(benchmarkId) {
|
|
|
24200
24270
|
}
|
|
24201
24271
|
function discoverBenchmarks(rootDir, maxDepth = 2) {
|
|
24202
24272
|
const absRoot = import_node_path54.default.resolve(rootDir);
|
|
24203
|
-
if (!(0,
|
|
24273
|
+
if (!(0, import_node_fs20.existsSync)(absRoot) || !(0, import_node_fs20.statSync)(absRoot).isDirectory()) {
|
|
24204
24274
|
return [];
|
|
24205
24275
|
}
|
|
24206
24276
|
const results = [];
|
|
24207
24277
|
function scan(dir, depth) {
|
|
24208
24278
|
if (depth > maxDepth) return;
|
|
24209
|
-
if ((0,
|
|
24279
|
+
if ((0, import_node_fs20.existsSync)(import_node_path54.default.join(dir, ".agentv"))) {
|
|
24210
24280
|
results.push(dir);
|
|
24211
24281
|
return;
|
|
24212
24282
|
}
|
|
24213
24283
|
if (depth === maxDepth) return;
|
|
24214
24284
|
try {
|
|
24215
|
-
const entries = (0,
|
|
24285
|
+
const entries = (0, import_node_fs20.readdirSync)(dir, { withFileTypes: true });
|
|
24216
24286
|
for (const entry of entries) {
|
|
24217
24287
|
if (!entry.isDirectory()) continue;
|
|
24218
24288
|
if (entry.name.startsWith(".") || entry.name === "node_modules") continue;
|
|
@@ -25153,9 +25223,9 @@ function extractResponseItemContent(content) {
|
|
|
25153
25223
|
// src/import/codex-session-discovery.ts
|
|
25154
25224
|
init_cjs_shims();
|
|
25155
25225
|
var import_promises41 = require("fs/promises");
|
|
25156
|
-
var
|
|
25226
|
+
var import_node_os11 = require("os");
|
|
25157
25227
|
var import_node_path56 = __toESM(require("path"), 1);
|
|
25158
|
-
var DEFAULT_SESSIONS_DIR = () => import_node_path56.default.join((0,
|
|
25228
|
+
var DEFAULT_SESSIONS_DIR = () => import_node_path56.default.join((0, import_node_os11.homedir)(), ".codex", "sessions");
|
|
25159
25229
|
async function discoverCodexSessions(opts) {
|
|
25160
25230
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
25161
25231
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
@@ -25219,9 +25289,9 @@ async function discoverCodexSessions(opts) {
|
|
|
25219
25289
|
// src/import/session-discovery.ts
|
|
25220
25290
|
init_cjs_shims();
|
|
25221
25291
|
var import_promises42 = require("fs/promises");
|
|
25222
|
-
var
|
|
25292
|
+
var import_node_os12 = require("os");
|
|
25223
25293
|
var import_node_path57 = __toESM(require("path"), 1);
|
|
25224
|
-
var DEFAULT_PROJECTS_DIR = () => import_node_path57.default.join((0,
|
|
25294
|
+
var DEFAULT_PROJECTS_DIR = () => import_node_path57.default.join((0, import_node_os12.homedir)(), ".claude", "projects");
|
|
25225
25295
|
function encodeProjectPath(projectPath) {
|
|
25226
25296
|
return projectPath.replace(/\//g, "-");
|
|
25227
25297
|
}
|
|
@@ -25515,6 +25585,7 @@ function createAgentKernel() {
|
|
|
25515
25585
|
extractJsonBlob,
|
|
25516
25586
|
extractLastAssistantContent,
|
|
25517
25587
|
extractTargetFromSuite,
|
|
25588
|
+
extractTargetRefsFromSuite,
|
|
25518
25589
|
extractTargetsFromSuite,
|
|
25519
25590
|
extractTargetsFromTestCase,
|
|
25520
25591
|
extractThreshold,
|
|
@@ -25524,6 +25595,7 @@ function createAgentKernel() {
|
|
|
25524
25595
|
findGitRoot,
|
|
25525
25596
|
freeformEvaluationSchema,
|
|
25526
25597
|
generateRubrics,
|
|
25598
|
+
getAgentvConfigDir,
|
|
25527
25599
|
getAgentvHome,
|
|
25528
25600
|
getBenchmark,
|
|
25529
25601
|
getBenchmarksRegistryPath,
|