@agentv/core 4.13.0 → 4.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-SWLNU3I6.js → chunk-A3HYVKTI.js} +1 -1
- package/dist/chunk-A3HYVKTI.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +997 -660
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +38 -1
- package/dist/index.d.ts +38 -1
- package/dist/index.js +853 -516
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-SWLNU3I6.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -25,7 +25,7 @@ import {
|
|
|
25
25
|
resolveDelegatedTargetDefinition,
|
|
26
26
|
resolveFileReference,
|
|
27
27
|
resolveTargetDefinition
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-A3HYVKTI.js";
|
|
29
29
|
import {
|
|
30
30
|
execFileWithStdin,
|
|
31
31
|
execShellWithStdin
|
|
@@ -3568,7 +3568,11 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
3568
3568
|
if (format === "agent-skills-json") {
|
|
3569
3569
|
return { tests: await loadTestsFromAgentSkills(evalFilePath) };
|
|
3570
3570
|
}
|
|
3571
|
-
const { tests, parsed } = await loadTestsFromYaml(
|
|
3571
|
+
const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
|
|
3572
|
+
evalFilePath,
|
|
3573
|
+
repoRoot,
|
|
3574
|
+
options
|
|
3575
|
+
);
|
|
3572
3576
|
const metadata = parseMetadata(parsed);
|
|
3573
3577
|
const failOnError = extractFailOnError(parsed);
|
|
3574
3578
|
const threshold = extractThreshold(parsed);
|
|
@@ -3581,7 +3585,8 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
3581
3585
|
totalBudgetUsd: extractTotalBudgetUsd(parsed),
|
|
3582
3586
|
...metadata !== void 0 && { metadata },
|
|
3583
3587
|
...failOnError !== void 0 && { failOnError },
|
|
3584
|
-
...threshold !== void 0 && { threshold }
|
|
3588
|
+
...threshold !== void 0 && { threshold },
|
|
3589
|
+
...suiteWorkspacePath !== void 0 && { workspacePath: suiteWorkspacePath }
|
|
3585
3590
|
};
|
|
3586
3591
|
}
|
|
3587
3592
|
var loadEvalSuite = loadTestSuite;
|
|
@@ -3743,6 +3748,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3743
3748
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
3744
3749
|
const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
|
|
3745
3750
|
const caseTargets = extractTargetsFromTestCase(testCaseConfig);
|
|
3751
|
+
const dependsOn = Array.isArray(testCaseConfig.depends_on) ? testCaseConfig.depends_on.filter(
|
|
3752
|
+
(v) => typeof v === "string"
|
|
3753
|
+
) : void 0;
|
|
3754
|
+
const onDependencyFailureRaw = asString5(testCaseConfig.on_dependency_failure);
|
|
3755
|
+
const onDependencyFailure = onDependencyFailureRaw === "skip" || onDependencyFailureRaw === "fail" || onDependencyFailureRaw === "run" ? onDependencyFailureRaw : void 0;
|
|
3746
3756
|
const testCase = {
|
|
3747
3757
|
id,
|
|
3748
3758
|
suite: suiteName,
|
|
@@ -3760,11 +3770,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
3760
3770
|
workspace: mergedWorkspace,
|
|
3761
3771
|
metadata,
|
|
3762
3772
|
targets: caseTargets,
|
|
3763
|
-
...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
|
|
3773
|
+
...caseThreshold !== void 0 ? { threshold: caseThreshold } : {},
|
|
3774
|
+
...dependsOn && dependsOn.length > 0 ? { depends_on: dependsOn } : {},
|
|
3775
|
+
...onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {}
|
|
3764
3776
|
};
|
|
3765
3777
|
results.push(testCase);
|
|
3766
3778
|
}
|
|
3767
|
-
return { tests: results, parsed: suite };
|
|
3779
|
+
return { tests: results, parsed: suite, suiteWorkspacePath: suiteWorkspace?.path };
|
|
3768
3780
|
}
|
|
3769
3781
|
async function loadTestById(evalFilePath, repoRoot, evalId) {
|
|
3770
3782
|
const tests = await loadTests(evalFilePath, repoRoot);
|
|
@@ -6612,11 +6624,123 @@ function formatElapsed3(startedAt) {
|
|
|
6612
6624
|
// src/evaluation/providers/copilot-cli.ts
|
|
6613
6625
|
import { randomUUID as randomUUID5 } from "node:crypto";
|
|
6614
6626
|
import { mkdir as mkdir4 } from "node:fs/promises";
|
|
6615
|
-
import
|
|
6627
|
+
import { homedir as homedir2 } from "node:os";
|
|
6628
|
+
import path17 from "node:path";
|
|
6616
6629
|
import { Readable, Writable } from "node:stream";
|
|
6617
6630
|
import { spawn as spawn2 } from "node:child_process";
|
|
6618
6631
|
import * as acp from "@agentclientprotocol/sdk";
|
|
6619
6632
|
|
|
6633
|
+
// src/evaluation/workspace/file-changes.ts
|
|
6634
|
+
import { exec as execCallback } from "node:child_process";
|
|
6635
|
+
import { readdirSync, statSync } from "node:fs";
|
|
6636
|
+
import { readFile as readFile9, readdir, stat } from "node:fs/promises";
|
|
6637
|
+
import path15 from "node:path";
|
|
6638
|
+
import { promisify as promisify2 } from "node:util";
|
|
6639
|
+
var execAsync2 = promisify2(execCallback);
|
|
6640
|
+
var SNAPSHOT_MAX_FILE_BYTES = 512 * 1024;
|
|
6641
|
+
var SNAPSHOT_EXCLUDE_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".agentv", "__pycache__"]);
|
|
6642
|
+
function gitExecOpts(workspacePath) {
|
|
6643
|
+
const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
|
|
6644
|
+
return { cwd: workspacePath, env };
|
|
6645
|
+
}
|
|
6646
|
+
async function initializeBaseline(workspacePath) {
|
|
6647
|
+
const opts = gitExecOpts(workspacePath);
|
|
6648
|
+
await execAsync2("git init", opts);
|
|
6649
|
+
await execAsync2("git add -A", opts);
|
|
6650
|
+
await execAsync2(
|
|
6651
|
+
'git -c user.email=agentv@localhost -c user.name=agentv commit --allow-empty -m "agentv-baseline"',
|
|
6652
|
+
opts
|
|
6653
|
+
);
|
|
6654
|
+
const { stdout } = await execAsync2("git rev-parse HEAD", opts);
|
|
6655
|
+
return stdout.trim();
|
|
6656
|
+
}
|
|
6657
|
+
async function captureFileChanges(workspacePath, baselineCommit) {
|
|
6658
|
+
const opts = gitExecOpts(workspacePath);
|
|
6659
|
+
await stageNestedRepoChanges(workspacePath);
|
|
6660
|
+
await execAsync2("git add -A", opts);
|
|
6661
|
+
const { stdout } = await execAsync2(`git diff ${baselineCommit} --submodule=diff`, opts);
|
|
6662
|
+
return stdout.trim();
|
|
6663
|
+
}
|
|
6664
|
+
async function stageNestedRepoChanges(workspacePath) {
|
|
6665
|
+
let entries;
|
|
6666
|
+
try {
|
|
6667
|
+
entries = readdirSync(workspacePath);
|
|
6668
|
+
} catch {
|
|
6669
|
+
return;
|
|
6670
|
+
}
|
|
6671
|
+
for (const entry of entries) {
|
|
6672
|
+
if (entry === ".git" || entry === "node_modules") continue;
|
|
6673
|
+
const childPath = path15.join(workspacePath, entry);
|
|
6674
|
+
try {
|
|
6675
|
+
if (!statSync(childPath).isDirectory()) continue;
|
|
6676
|
+
if (!statSync(path15.join(childPath, ".git")).isDirectory()) continue;
|
|
6677
|
+
} catch {
|
|
6678
|
+
continue;
|
|
6679
|
+
}
|
|
6680
|
+
const childOpts = gitExecOpts(childPath);
|
|
6681
|
+
await execAsync2("git add -A", childOpts);
|
|
6682
|
+
}
|
|
6683
|
+
}
|
|
6684
|
+
async function captureSnapshot(dir) {
|
|
6685
|
+
const snapshot = /* @__PURE__ */ new Map();
|
|
6686
|
+
await walkDir(dir, dir, snapshot);
|
|
6687
|
+
return snapshot;
|
|
6688
|
+
}
|
|
6689
|
+
async function walkDir(rootDir, currentDir, snapshot) {
|
|
6690
|
+
let entries;
|
|
6691
|
+
try {
|
|
6692
|
+
entries = await readdir(currentDir);
|
|
6693
|
+
} catch {
|
|
6694
|
+
return;
|
|
6695
|
+
}
|
|
6696
|
+
for (const entry of entries) {
|
|
6697
|
+
if (SNAPSHOT_EXCLUDE_DIRS.has(entry)) continue;
|
|
6698
|
+
const fullPath = path15.join(currentDir, entry);
|
|
6699
|
+
let fileStat;
|
|
6700
|
+
try {
|
|
6701
|
+
fileStat = await stat(fullPath);
|
|
6702
|
+
} catch {
|
|
6703
|
+
continue;
|
|
6704
|
+
}
|
|
6705
|
+
if (fileStat.isDirectory()) {
|
|
6706
|
+
await walkDir(rootDir, fullPath, snapshot);
|
|
6707
|
+
} else if (fileStat.isFile()) {
|
|
6708
|
+
if (fileStat.size > SNAPSHOT_MAX_FILE_BYTES) continue;
|
|
6709
|
+
let content;
|
|
6710
|
+
try {
|
|
6711
|
+
content = await readFile9(fullPath, "utf8");
|
|
6712
|
+
if (content.includes("\0")) continue;
|
|
6713
|
+
} catch {
|
|
6714
|
+
continue;
|
|
6715
|
+
}
|
|
6716
|
+
const relativePath = path15.relative(rootDir, fullPath).replace(/\\/g, "/");
|
|
6717
|
+
snapshot.set(relativePath, content);
|
|
6718
|
+
}
|
|
6719
|
+
}
|
|
6720
|
+
}
|
|
6721
|
+
function generateNewFileDiff(relativePath, content) {
|
|
6722
|
+
const lines = content.endsWith("\n") ? content.slice(0, -1).split("\n") : content.split("\n");
|
|
6723
|
+
const addedLines = lines.map((l) => `+${l}`).join("\n");
|
|
6724
|
+
return [
|
|
6725
|
+
`diff --git a/${relativePath} b/${relativePath}`,
|
|
6726
|
+
"new file mode 100644",
|
|
6727
|
+
"--- /dev/null",
|
|
6728
|
+
`+++ b/${relativePath}`,
|
|
6729
|
+
`@@ -0,0 +1,${lines.length} @@`,
|
|
6730
|
+
addedLines
|
|
6731
|
+
].join("\n");
|
|
6732
|
+
}
|
|
6733
|
+
async function captureSessionArtifacts(filesDir, pathPrefix = "") {
|
|
6734
|
+
const snapshot = await captureSnapshot(filesDir).catch(() => void 0);
|
|
6735
|
+
if (!snapshot || snapshot.size === 0) return void 0;
|
|
6736
|
+
const parts = [];
|
|
6737
|
+
for (const [relPath, content] of snapshot) {
|
|
6738
|
+
const displayPath = pathPrefix ? `${pathPrefix}/${relPath}` : relPath;
|
|
6739
|
+
parts.push(generateNewFileDiff(displayPath, content));
|
|
6740
|
+
}
|
|
6741
|
+
return parts.join("\n");
|
|
6742
|
+
}
|
|
6743
|
+
|
|
6620
6744
|
// src/evaluation/providers/copilot-cli-log-tracker.ts
|
|
6621
6745
|
var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.copilotCliLogs");
|
|
6622
6746
|
var GLOBAL_SUBSCRIBERS_KEY3 = Symbol.for("agentv.copilotCliLogSubscribers");
|
|
@@ -6672,9 +6796,9 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
6672
6796
|
|
|
6673
6797
|
// src/evaluation/providers/copilot-utils.ts
|
|
6674
6798
|
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
6675
|
-
import { createWriteStream as createWriteStream4, existsSync, readdirSync } from "node:fs";
|
|
6799
|
+
import { createWriteStream as createWriteStream4, existsSync, readdirSync as readdirSync2 } from "node:fs";
|
|
6676
6800
|
import { arch, homedir, platform } from "node:os";
|
|
6677
|
-
import
|
|
6801
|
+
import path16 from "node:path";
|
|
6678
6802
|
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
6679
6803
|
function resolvePlatformCliPath() {
|
|
6680
6804
|
const os4 = platform();
|
|
@@ -6698,7 +6822,7 @@ function resolvePlatformCliPath() {
|
|
|
6698
6822
|
try {
|
|
6699
6823
|
const resolved = import.meta.resolve(`${packageName}/package.json`);
|
|
6700
6824
|
const packageJsonPath = resolved.startsWith("file:") ? fileURLToPath3(resolved) : resolved;
|
|
6701
|
-
const binaryPath =
|
|
6825
|
+
const binaryPath = path16.join(path16.dirname(packageJsonPath), binaryName);
|
|
6702
6826
|
if (existsSync(binaryPath)) {
|
|
6703
6827
|
return binaryPath;
|
|
6704
6828
|
}
|
|
@@ -6706,7 +6830,7 @@ function resolvePlatformCliPath() {
|
|
|
6706
6830
|
}
|
|
6707
6831
|
let searchDir = process.cwd();
|
|
6708
6832
|
for (let i = 0; i < 10; i++) {
|
|
6709
|
-
const standardPath =
|
|
6833
|
+
const standardPath = path16.join(
|
|
6710
6834
|
searchDir,
|
|
6711
6835
|
"node_modules",
|
|
6712
6836
|
...packageName.split("/"),
|
|
@@ -6715,13 +6839,13 @@ function resolvePlatformCliPath() {
|
|
|
6715
6839
|
if (existsSync(standardPath)) {
|
|
6716
6840
|
return standardPath;
|
|
6717
6841
|
}
|
|
6718
|
-
const bunDir =
|
|
6842
|
+
const bunDir = path16.join(searchDir, "node_modules", ".bun");
|
|
6719
6843
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
6720
6844
|
try {
|
|
6721
|
-
const entries =
|
|
6845
|
+
const entries = readdirSync2(bunDir);
|
|
6722
6846
|
for (const entry of entries) {
|
|
6723
6847
|
if (entry.startsWith(prefix)) {
|
|
6724
|
-
const candidate =
|
|
6848
|
+
const candidate = path16.join(
|
|
6725
6849
|
bunDir,
|
|
6726
6850
|
entry,
|
|
6727
6851
|
"node_modules",
|
|
@@ -6736,16 +6860,16 @@ function resolvePlatformCliPath() {
|
|
|
6736
6860
|
}
|
|
6737
6861
|
} catch {
|
|
6738
6862
|
}
|
|
6739
|
-
const parent =
|
|
6863
|
+
const parent = path16.dirname(searchDir);
|
|
6740
6864
|
if (parent === searchDir) break;
|
|
6741
6865
|
searchDir = parent;
|
|
6742
6866
|
}
|
|
6743
6867
|
for (const root of globalNpmRoots()) {
|
|
6744
|
-
const hoisted =
|
|
6868
|
+
const hoisted = path16.join(root, "@github", `copilot-${osPart}-${archPart}`, binaryName);
|
|
6745
6869
|
if (existsSync(hoisted)) {
|
|
6746
6870
|
return hoisted;
|
|
6747
6871
|
}
|
|
6748
|
-
const nested =
|
|
6872
|
+
const nested = path16.join(
|
|
6749
6873
|
root,
|
|
6750
6874
|
"@github",
|
|
6751
6875
|
"copilot",
|
|
@@ -6766,20 +6890,20 @@ function globalNpmRoots() {
|
|
|
6766
6890
|
const home = homedir();
|
|
6767
6891
|
if (os4 === "win32") {
|
|
6768
6892
|
if (process.env.APPDATA) {
|
|
6769
|
-
roots.push(
|
|
6893
|
+
roots.push(path16.join(process.env.APPDATA, "npm", "node_modules"));
|
|
6770
6894
|
}
|
|
6771
|
-
roots.push(
|
|
6895
|
+
roots.push(path16.join(home, "AppData", "Roaming", "npm", "node_modules"));
|
|
6772
6896
|
} else {
|
|
6773
6897
|
roots.push("/opt/homebrew/lib/node_modules");
|
|
6774
6898
|
roots.push("/usr/local/lib/node_modules");
|
|
6775
6899
|
roots.push("/usr/lib/node_modules");
|
|
6776
|
-
roots.push(
|
|
6777
|
-
roots.push(
|
|
6900
|
+
roots.push(path16.join(home, ".npm-global", "lib", "node_modules"));
|
|
6901
|
+
roots.push(path16.join(home, ".local", "lib", "node_modules"));
|
|
6778
6902
|
}
|
|
6779
6903
|
if (process.env.npm_config_prefix) {
|
|
6780
6904
|
const prefix = process.env.npm_config_prefix;
|
|
6781
6905
|
roots.push(
|
|
6782
|
-
os4 === "win32" ?
|
|
6906
|
+
os4 === "win32" ? path16.join(prefix, "node_modules") : path16.join(prefix, "lib", "node_modules")
|
|
6783
6907
|
);
|
|
6784
6908
|
}
|
|
6785
6909
|
return Array.from(new Set(roots));
|
|
@@ -6826,14 +6950,22 @@ var CopilotStreamLogger = class _CopilotStreamLogger {
|
|
|
6826
6950
|
startedAt = Date.now();
|
|
6827
6951
|
format;
|
|
6828
6952
|
summarize;
|
|
6829
|
-
|
|
6953
|
+
chunkExtractor;
|
|
6954
|
+
pendingText = "";
|
|
6955
|
+
constructor(filePath, format, summarize, chunkExtractor) {
|
|
6830
6956
|
this.filePath = filePath;
|
|
6831
6957
|
this.format = format;
|
|
6832
6958
|
this.summarize = summarize;
|
|
6959
|
+
this.chunkExtractor = chunkExtractor;
|
|
6833
6960
|
this.stream = createWriteStream4(filePath, { flags: "a" });
|
|
6834
6961
|
}
|
|
6835
6962
|
static async create(options, summarize) {
|
|
6836
|
-
const logger = new _CopilotStreamLogger(
|
|
6963
|
+
const logger = new _CopilotStreamLogger(
|
|
6964
|
+
options.filePath,
|
|
6965
|
+
options.format,
|
|
6966
|
+
summarize,
|
|
6967
|
+
options.chunkExtractor
|
|
6968
|
+
);
|
|
6837
6969
|
const header = [
|
|
6838
6970
|
`# ${options.headerLabel} stream log`,
|
|
6839
6971
|
`# target: ${options.targetName}`,
|
|
@@ -6849,19 +6981,42 @@ var CopilotStreamLogger = class _CopilotStreamLogger {
|
|
|
6849
6981
|
return logger;
|
|
6850
6982
|
}
|
|
6851
6983
|
handleEvent(eventType, data) {
|
|
6852
|
-
const elapsed = formatElapsed4(this.startedAt);
|
|
6853
6984
|
if (this.format === "json") {
|
|
6854
|
-
|
|
6855
|
-
|
|
6856
|
-
} else {
|
|
6857
|
-
const summary = this.summarize(eventType, data);
|
|
6858
|
-
if (summary) {
|
|
6859
|
-
this.stream.write(`[+${elapsed}] [${eventType}] ${summary}
|
|
6985
|
+
const elapsed2 = formatElapsed4(this.startedAt);
|
|
6986
|
+
this.stream.write(`${JSON.stringify({ time: elapsed2, event: eventType, data })}
|
|
6860
6987
|
`);
|
|
6988
|
+
return;
|
|
6989
|
+
}
|
|
6990
|
+
if (this.chunkExtractor) {
|
|
6991
|
+
const chunkText = this.chunkExtractor(eventType, data);
|
|
6992
|
+
if (chunkText === null) {
|
|
6993
|
+
this.pendingText = "";
|
|
6994
|
+
return;
|
|
6995
|
+
}
|
|
6996
|
+
if (chunkText !== void 0) {
|
|
6997
|
+
this.pendingText += chunkText;
|
|
6998
|
+
return;
|
|
6861
6999
|
}
|
|
7000
|
+
this.flushPendingText();
|
|
7001
|
+
}
|
|
7002
|
+
const elapsed = formatElapsed4(this.startedAt);
|
|
7003
|
+
const summary = this.summarize(eventType, data);
|
|
7004
|
+
if (summary) {
|
|
7005
|
+
this.stream.write(`[+${elapsed}] [${eventType}] ${summary}
|
|
7006
|
+
`);
|
|
6862
7007
|
}
|
|
6863
7008
|
}
|
|
7009
|
+
flushPendingText() {
|
|
7010
|
+
if (!this.pendingText) return;
|
|
7011
|
+
const elapsed = formatElapsed4(this.startedAt);
|
|
7012
|
+
this.stream.write(`[+${elapsed}] [assistant_message] ${this.pendingText}
|
|
7013
|
+
`);
|
|
7014
|
+
this.pendingText = "";
|
|
7015
|
+
}
|
|
6864
7016
|
async close() {
|
|
7017
|
+
if (this.format !== "json") {
|
|
7018
|
+
this.flushPendingText();
|
|
7019
|
+
}
|
|
6865
7020
|
await new Promise((resolve, reject) => {
|
|
6866
7021
|
this.stream.once("error", reject);
|
|
6867
7022
|
this.stream.end(() => resolve());
|
|
@@ -7070,6 +7225,10 @@ var CopilotCliProvider = class {
|
|
|
7070
7225
|
content: finalContent
|
|
7071
7226
|
});
|
|
7072
7227
|
}
|
|
7228
|
+
const sessionId = session.sessionId;
|
|
7229
|
+
const fileChanges = sessionId ? await captureSessionArtifacts(
|
|
7230
|
+
path17.join(homedir2(), ".copilot", "session-state", sessionId, "files")
|
|
7231
|
+
).catch(() => void 0) : void 0;
|
|
7073
7232
|
return {
|
|
7074
7233
|
raw: {
|
|
7075
7234
|
model: this.config.model,
|
|
@@ -7081,7 +7240,8 @@ var CopilotCliProvider = class {
|
|
|
7081
7240
|
costUsd,
|
|
7082
7241
|
durationMs,
|
|
7083
7242
|
startTime,
|
|
7084
|
-
endTime
|
|
7243
|
+
endTime,
|
|
7244
|
+
...fileChanges ? { fileChanges } : {}
|
|
7085
7245
|
};
|
|
7086
7246
|
} finally {
|
|
7087
7247
|
await logger?.close();
|
|
@@ -7122,10 +7282,10 @@ var CopilotCliProvider = class {
|
|
|
7122
7282
|
}
|
|
7123
7283
|
resolveCwd(cwdOverride) {
|
|
7124
7284
|
if (cwdOverride) {
|
|
7125
|
-
return
|
|
7285
|
+
return path17.resolve(cwdOverride);
|
|
7126
7286
|
}
|
|
7127
7287
|
if (this.config.cwd) {
|
|
7128
|
-
return
|
|
7288
|
+
return path17.resolve(this.config.cwd);
|
|
7129
7289
|
}
|
|
7130
7290
|
return void 0;
|
|
7131
7291
|
}
|
|
@@ -7144,9 +7304,9 @@ var CopilotCliProvider = class {
|
|
|
7144
7304
|
return void 0;
|
|
7145
7305
|
}
|
|
7146
7306
|
if (this.config.logDir) {
|
|
7147
|
-
return
|
|
7307
|
+
return path17.resolve(this.config.logDir);
|
|
7148
7308
|
}
|
|
7149
|
-
return
|
|
7309
|
+
return path17.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
7150
7310
|
}
|
|
7151
7311
|
async createStreamLogger(request) {
|
|
7152
7312
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7160,7 +7320,7 @@ var CopilotCliProvider = class {
|
|
|
7160
7320
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
7161
7321
|
return void 0;
|
|
7162
7322
|
}
|
|
7163
|
-
const filePath =
|
|
7323
|
+
const filePath = path17.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
7164
7324
|
try {
|
|
7165
7325
|
const logger = await CopilotStreamLogger.create(
|
|
7166
7326
|
{
|
|
@@ -7169,7 +7329,8 @@ var CopilotCliProvider = class {
|
|
|
7169
7329
|
evalCaseId: request.evalCaseId,
|
|
7170
7330
|
attempt: request.attempt,
|
|
7171
7331
|
format: this.config.logFormat ?? "summary",
|
|
7172
|
-
headerLabel: "Copilot CLI (ACP)"
|
|
7332
|
+
headerLabel: "Copilot CLI (ACP)",
|
|
7333
|
+
chunkExtractor: extractAcpChunk
|
|
7173
7334
|
},
|
|
7174
7335
|
summarizeAcpEvent
|
|
7175
7336
|
);
|
|
@@ -7228,6 +7389,14 @@ Fix options:
|
|
|
7228
7389
|
- In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
|
|
7229
7390
|
- In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
|
|
7230
7391
|
}
|
|
7392
|
+
function extractAcpChunk(eventType, data) {
|
|
7393
|
+
if (eventType === "agent_thought_chunk") return null;
|
|
7394
|
+
if (eventType !== "agent_message_chunk") return void 0;
|
|
7395
|
+
if (!data || typeof data !== "object") return void 0;
|
|
7396
|
+
const d = data;
|
|
7397
|
+
const content = d.content;
|
|
7398
|
+
return content?.type === "text" && typeof content.text === "string" ? content.text : void 0;
|
|
7399
|
+
}
|
|
7231
7400
|
function summarizeAcpEvent(eventType, data) {
|
|
7232
7401
|
if (!data || typeof data !== "object") {
|
|
7233
7402
|
return eventType;
|
|
@@ -7253,9 +7422,9 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
7253
7422
|
}
|
|
7254
7423
|
|
|
7255
7424
|
// src/evaluation/providers/copilot-log.ts
|
|
7256
|
-
import { readFile as
|
|
7257
|
-
import { homedir as
|
|
7258
|
-
import
|
|
7425
|
+
import { readFile as readFile11 } from "node:fs/promises";
|
|
7426
|
+
import { homedir as homedir4 } from "node:os";
|
|
7427
|
+
import path19 from "node:path";
|
|
7259
7428
|
|
|
7260
7429
|
// src/evaluation/providers/copilot-log-parser.ts
|
|
7261
7430
|
function parseCopilotEvents(eventsJsonl) {
|
|
@@ -7387,32 +7556,32 @@ function parseCopilotEvents(eventsJsonl) {
|
|
|
7387
7556
|
}
|
|
7388
7557
|
|
|
7389
7558
|
// src/evaluation/providers/copilot-session-discovery.ts
|
|
7390
|
-
import { readFile as
|
|
7391
|
-
import { homedir as
|
|
7392
|
-
import
|
|
7559
|
+
import { readFile as readFile10, readdir as readdir2, stat as stat2 } from "node:fs/promises";
|
|
7560
|
+
import { homedir as homedir3 } from "node:os";
|
|
7561
|
+
import path18 from "node:path";
|
|
7393
7562
|
import { parse as parseYaml2 } from "yaml";
|
|
7394
|
-
var DEFAULT_SESSION_STATE_DIR = () =>
|
|
7563
|
+
var DEFAULT_SESSION_STATE_DIR = () => path18.join(homedir3(), ".copilot", "session-state");
|
|
7395
7564
|
async function discoverCopilotSessions(opts) {
|
|
7396
7565
|
const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
|
|
7397
7566
|
const limit = opts?.limit ?? 10;
|
|
7398
7567
|
let entries;
|
|
7399
7568
|
try {
|
|
7400
|
-
entries = await
|
|
7569
|
+
entries = await readdir2(sessionStateDir);
|
|
7401
7570
|
} catch {
|
|
7402
7571
|
return [];
|
|
7403
7572
|
}
|
|
7404
7573
|
const sessions = [];
|
|
7405
7574
|
for (const entry of entries) {
|
|
7406
|
-
const sessionDir =
|
|
7407
|
-
const workspacePath =
|
|
7408
|
-
const eventsPath =
|
|
7575
|
+
const sessionDir = path18.join(sessionStateDir, entry);
|
|
7576
|
+
const workspacePath = path18.join(sessionDir, "workspace.yaml");
|
|
7577
|
+
const eventsPath = path18.join(sessionDir, "events.jsonl");
|
|
7409
7578
|
try {
|
|
7410
|
-
const workspaceContent = await
|
|
7579
|
+
const workspaceContent = await readFile10(workspacePath, "utf8");
|
|
7411
7580
|
const workspace = parseYaml2(workspaceContent) ?? {};
|
|
7412
7581
|
const cwd = String(workspace.cwd ?? "");
|
|
7413
7582
|
let updatedAt;
|
|
7414
7583
|
try {
|
|
7415
|
-
const eventsStat = await
|
|
7584
|
+
const eventsStat = await stat2(eventsPath);
|
|
7416
7585
|
updatedAt = eventsStat.mtime;
|
|
7417
7586
|
} catch {
|
|
7418
7587
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -7466,21 +7635,24 @@ var CopilotLogProvider = class {
|
|
|
7466
7635
|
}
|
|
7467
7636
|
async invoke(_request) {
|
|
7468
7637
|
const sessionDir = await this.resolveSessionDir();
|
|
7469
|
-
const eventsPath =
|
|
7638
|
+
const eventsPath = path19.join(sessionDir, "events.jsonl");
|
|
7470
7639
|
let eventsContent;
|
|
7471
7640
|
try {
|
|
7472
|
-
eventsContent = await
|
|
7641
|
+
eventsContent = await readFile11(eventsPath, "utf8");
|
|
7473
7642
|
} catch (err) {
|
|
7474
7643
|
throw new Error(
|
|
7475
7644
|
`Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
7476
7645
|
);
|
|
7477
7646
|
}
|
|
7478
7647
|
const parsed = parseCopilotEvents(eventsContent);
|
|
7648
|
+
const filesDir = path19.join(sessionDir, "files");
|
|
7649
|
+
const fileChanges = await captureSessionArtifacts(filesDir).catch(() => void 0);
|
|
7479
7650
|
return {
|
|
7480
7651
|
output: parsed.messages,
|
|
7481
7652
|
tokenUsage: parsed.tokenUsage,
|
|
7482
7653
|
durationMs: parsed.durationMs,
|
|
7483
|
-
startTime: parsed.meta.startedAt
|
|
7654
|
+
startTime: parsed.meta.startedAt,
|
|
7655
|
+
...fileChanges ? { fileChanges } : {}
|
|
7484
7656
|
};
|
|
7485
7657
|
}
|
|
7486
7658
|
async resolveSessionDir() {
|
|
@@ -7488,8 +7660,8 @@ var CopilotLogProvider = class {
|
|
|
7488
7660
|
return this.config.sessionDir;
|
|
7489
7661
|
}
|
|
7490
7662
|
if (this.config.sessionId) {
|
|
7491
|
-
const stateDir = this.config.sessionStateDir ??
|
|
7492
|
-
return
|
|
7663
|
+
const stateDir = this.config.sessionStateDir ?? path19.join(homedir4(), ".copilot", "session-state");
|
|
7664
|
+
return path19.join(stateDir, this.config.sessionId);
|
|
7493
7665
|
}
|
|
7494
7666
|
if (this.config.discover === "latest") {
|
|
7495
7667
|
const sessions = await discoverCopilotSessions({
|
|
@@ -7514,7 +7686,7 @@ var CopilotLogProvider = class {
|
|
|
7514
7686
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
7515
7687
|
import { existsSync as existsSync2 } from "node:fs";
|
|
7516
7688
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
7517
|
-
import
|
|
7689
|
+
import path20 from "node:path";
|
|
7518
7690
|
|
|
7519
7691
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
7520
7692
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -7760,6 +7932,10 @@ var CopilotSdkProvider = class {
|
|
|
7760
7932
|
content: finalContent
|
|
7761
7933
|
});
|
|
7762
7934
|
}
|
|
7935
|
+
const sessionWorkspacePath = session.workspacePath;
|
|
7936
|
+
const fileChanges = sessionWorkspacePath ? await captureSessionArtifacts(path20.join(sessionWorkspacePath, "files")).catch(
|
|
7937
|
+
() => void 0
|
|
7938
|
+
) : void 0;
|
|
7763
7939
|
return {
|
|
7764
7940
|
raw: {
|
|
7765
7941
|
model: this.config.model,
|
|
@@ -7771,7 +7947,8 @@ var CopilotSdkProvider = class {
|
|
|
7771
7947
|
costUsd,
|
|
7772
7948
|
durationMs,
|
|
7773
7949
|
startTime,
|
|
7774
|
-
endTime
|
|
7950
|
+
endTime,
|
|
7951
|
+
...fileChanges ? { fileChanges } : {}
|
|
7775
7952
|
};
|
|
7776
7953
|
} finally {
|
|
7777
7954
|
unsubscribe();
|
|
@@ -7824,10 +8001,10 @@ var CopilotSdkProvider = class {
|
|
|
7824
8001
|
}
|
|
7825
8002
|
resolveCwd(cwdOverride) {
|
|
7826
8003
|
if (cwdOverride) {
|
|
7827
|
-
return
|
|
8004
|
+
return path20.resolve(cwdOverride);
|
|
7828
8005
|
}
|
|
7829
8006
|
if (this.config.cwd) {
|
|
7830
|
-
return
|
|
8007
|
+
return path20.resolve(this.config.cwd);
|
|
7831
8008
|
}
|
|
7832
8009
|
return void 0;
|
|
7833
8010
|
}
|
|
@@ -7836,9 +8013,9 @@ var CopilotSdkProvider = class {
|
|
|
7836
8013
|
return void 0;
|
|
7837
8014
|
}
|
|
7838
8015
|
if (this.config.logDir) {
|
|
7839
|
-
return
|
|
8016
|
+
return path20.resolve(this.config.logDir);
|
|
7840
8017
|
}
|
|
7841
|
-
return
|
|
8018
|
+
return path20.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
7842
8019
|
}
|
|
7843
8020
|
async createStreamLogger(request) {
|
|
7844
8021
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7852,7 +8029,7 @@ var CopilotSdkProvider = class {
|
|
|
7852
8029
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
7853
8030
|
return void 0;
|
|
7854
8031
|
}
|
|
7855
|
-
const filePath =
|
|
8032
|
+
const filePath = path20.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
7856
8033
|
try {
|
|
7857
8034
|
const logger = await CopilotStreamLogger.create(
|
|
7858
8035
|
{
|
|
@@ -7861,7 +8038,8 @@ var CopilotSdkProvider = class {
|
|
|
7861
8038
|
evalCaseId: request.evalCaseId,
|
|
7862
8039
|
attempt: request.attempt,
|
|
7863
8040
|
format: this.config.logFormat ?? "summary",
|
|
7864
|
-
headerLabel: "Copilot SDK"
|
|
8041
|
+
headerLabel: "Copilot SDK",
|
|
8042
|
+
chunkExtractor: extractSdkChunk
|
|
7865
8043
|
},
|
|
7866
8044
|
summarizeSdkEvent
|
|
7867
8045
|
);
|
|
@@ -7881,9 +8059,9 @@ var CopilotSdkProvider = class {
|
|
|
7881
8059
|
};
|
|
7882
8060
|
function resolveSkillDirectories(cwd) {
|
|
7883
8061
|
const candidates = [
|
|
7884
|
-
|
|
7885
|
-
|
|
7886
|
-
|
|
8062
|
+
path20.join(cwd, ".claude", "skills"),
|
|
8063
|
+
path20.join(cwd, ".agents", "skills"),
|
|
8064
|
+
path20.join(cwd, ".codex", "skills")
|
|
7887
8065
|
];
|
|
7888
8066
|
return candidates.filter((dir) => existsSync2(dir));
|
|
7889
8067
|
}
|
|
@@ -7897,6 +8075,12 @@ function normalizeByokBaseUrl(baseUrl, type) {
|
|
|
7897
8075
|
}
|
|
7898
8076
|
return trimmed;
|
|
7899
8077
|
}
|
|
8078
|
+
function extractSdkChunk(eventType, data) {
|
|
8079
|
+
if (eventType !== "assistant.message_delta") return void 0;
|
|
8080
|
+
if (!data || typeof data !== "object") return void 0;
|
|
8081
|
+
const d = data;
|
|
8082
|
+
return typeof d.deltaContent === "string" ? d.deltaContent : void 0;
|
|
8083
|
+
}
|
|
7900
8084
|
function summarizeSdkEvent(eventType, data) {
|
|
7901
8085
|
if (!data || typeof data !== "object") {
|
|
7902
8086
|
return eventType;
|
|
@@ -7967,7 +8151,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
7967
8151
|
import { accessSync, createWriteStream as createWriteStream5, readFileSync as readFileSync2 } from "node:fs";
|
|
7968
8152
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
7969
8153
|
import { tmpdir } from "node:os";
|
|
7970
|
-
import
|
|
8154
|
+
import path21 from "node:path";
|
|
7971
8155
|
|
|
7972
8156
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
7973
8157
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -8173,7 +8357,7 @@ var PiCliProvider = class {
|
|
|
8173
8357
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
8174
8358
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
8175
8359
|
try {
|
|
8176
|
-
const promptFile =
|
|
8360
|
+
const promptFile = path21.join(cwd, PROMPT_FILENAME);
|
|
8177
8361
|
await writeFile(promptFile, request.question, "utf8");
|
|
8178
8362
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
8179
8363
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
@@ -8236,10 +8420,10 @@ var PiCliProvider = class {
|
|
|
8236
8420
|
}
|
|
8237
8421
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
8238
8422
|
if (cwdOverride) {
|
|
8239
|
-
return
|
|
8423
|
+
return path21.resolve(cwdOverride);
|
|
8240
8424
|
}
|
|
8241
8425
|
if (this.config.cwd) {
|
|
8242
|
-
return
|
|
8426
|
+
return path21.resolve(this.config.cwd);
|
|
8243
8427
|
}
|
|
8244
8428
|
if (workspaceRoot) {
|
|
8245
8429
|
return workspaceRoot;
|
|
@@ -8345,7 +8529,7 @@ ${prompt}` : prompt;
|
|
|
8345
8529
|
return env;
|
|
8346
8530
|
}
|
|
8347
8531
|
async createWorkspace() {
|
|
8348
|
-
return await mkdtemp(
|
|
8532
|
+
return await mkdtemp(path21.join(tmpdir(), WORKSPACE_PREFIX));
|
|
8349
8533
|
}
|
|
8350
8534
|
async cleanupWorkspace(workspaceRoot) {
|
|
8351
8535
|
try {
|
|
@@ -8355,9 +8539,9 @@ ${prompt}` : prompt;
|
|
|
8355
8539
|
}
|
|
8356
8540
|
resolveLogDirectory() {
|
|
8357
8541
|
if (this.config.logDir) {
|
|
8358
|
-
return
|
|
8542
|
+
return path21.resolve(this.config.logDir);
|
|
8359
8543
|
}
|
|
8360
|
-
return
|
|
8544
|
+
return path21.join(process.cwd(), ".agentv", "logs", "pi-cli");
|
|
8361
8545
|
}
|
|
8362
8546
|
async createStreamLogger(request) {
|
|
8363
8547
|
const logDir = this.resolveLogDirectory();
|
|
@@ -8371,7 +8555,7 @@ ${prompt}` : prompt;
|
|
|
8371
8555
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
8372
8556
|
return void 0;
|
|
8373
8557
|
}
|
|
8374
|
-
const filePath =
|
|
8558
|
+
const filePath = path21.join(logDir, buildLogFilename5(request, this.targetName));
|
|
8375
8559
|
try {
|
|
8376
8560
|
const logger = await PiStreamLogger.create({
|
|
8377
8561
|
filePath,
|
|
@@ -8842,8 +9026,8 @@ function resolveWindowsCmd(executable) {
|
|
|
8842
9026
|
const content = readFileSync2(cmdPath, "utf-8");
|
|
8843
9027
|
const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
|
|
8844
9028
|
if (match) {
|
|
8845
|
-
const dp0 =
|
|
8846
|
-
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${
|
|
9029
|
+
const dp0 = path21.dirname(path21.resolve(cmdPath));
|
|
9030
|
+
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${path21.sep}`);
|
|
8847
9031
|
try {
|
|
8848
9032
|
accessSync(scriptPath);
|
|
8849
9033
|
return ["node", [scriptPath]];
|
|
@@ -8922,13 +9106,13 @@ import { execSync as execSync2 } from "node:child_process";
|
|
|
8922
9106
|
import { randomUUID as randomUUID8 } from "node:crypto";
|
|
8923
9107
|
import { accessSync as accessSync2, createWriteStream as createWriteStream6, mkdirSync } from "node:fs";
|
|
8924
9108
|
import { mkdir as mkdir7 } from "node:fs/promises";
|
|
8925
|
-
import
|
|
9109
|
+
import path23 from "node:path";
|
|
8926
9110
|
import { createInterface } from "node:readline";
|
|
8927
9111
|
import { fileURLToPath as fileURLToPath4, pathToFileURL } from "node:url";
|
|
8928
9112
|
|
|
8929
9113
|
// src/paths.ts
|
|
8930
9114
|
import os2 from "node:os";
|
|
8931
|
-
import
|
|
9115
|
+
import path22 from "node:path";
|
|
8932
9116
|
var logged = false;
|
|
8933
9117
|
function getAgentvHome() {
|
|
8934
9118
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8939,19 +9123,19 @@ function getAgentvHome() {
|
|
|
8939
9123
|
}
|
|
8940
9124
|
return envHome;
|
|
8941
9125
|
}
|
|
8942
|
-
return
|
|
9126
|
+
return path22.join(os2.homedir(), ".agentv");
|
|
8943
9127
|
}
|
|
8944
9128
|
function getWorkspacesRoot() {
|
|
8945
|
-
return
|
|
9129
|
+
return path22.join(getAgentvHome(), "workspaces");
|
|
8946
9130
|
}
|
|
8947
9131
|
function getSubagentsRoot() {
|
|
8948
|
-
return
|
|
9132
|
+
return path22.join(getAgentvHome(), "subagents");
|
|
8949
9133
|
}
|
|
8950
9134
|
function getTraceStateRoot() {
|
|
8951
|
-
return
|
|
9135
|
+
return path22.join(getAgentvHome(), "trace-state");
|
|
8952
9136
|
}
|
|
8953
9137
|
function getWorkspacePoolRoot() {
|
|
8954
|
-
return
|
|
9138
|
+
return path22.join(getAgentvHome(), "workspace-pool");
|
|
8955
9139
|
}
|
|
8956
9140
|
|
|
8957
9141
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
@@ -8973,7 +9157,7 @@ async function promptInstall() {
|
|
|
8973
9157
|
}
|
|
8974
9158
|
}
|
|
8975
9159
|
function findManagedSdkInstallRoot() {
|
|
8976
|
-
return
|
|
9160
|
+
return path23.join(getAgentvHome(), "deps", "pi-sdk");
|
|
8977
9161
|
}
|
|
8978
9162
|
function resolveGlobalNpmRoot() {
|
|
8979
9163
|
try {
|
|
@@ -8987,7 +9171,7 @@ function resolveGlobalNpmRoot() {
|
|
|
8987
9171
|
}
|
|
8988
9172
|
}
|
|
8989
9173
|
function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
|
|
8990
|
-
return
|
|
9174
|
+
return path23.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
|
|
8991
9175
|
}
|
|
8992
9176
|
function findAccessiblePath(paths) {
|
|
8993
9177
|
for (const candidate of paths) {
|
|
@@ -9013,11 +9197,11 @@ async function tryImportLocalSdkModules() {
|
|
|
9013
9197
|
async function tryImportManagedSdkModules() {
|
|
9014
9198
|
const managedRoot = findManagedSdkInstallRoot();
|
|
9015
9199
|
const piCodingAgentEntry = findAccessiblePath([
|
|
9016
|
-
|
|
9200
|
+
path23.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
|
|
9017
9201
|
]);
|
|
9018
9202
|
const piAiEntry = findAccessiblePath([
|
|
9019
|
-
|
|
9020
|
-
|
|
9203
|
+
path23.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
|
|
9204
|
+
path23.join(
|
|
9021
9205
|
managedRoot,
|
|
9022
9206
|
"node_modules",
|
|
9023
9207
|
"@mariozechner",
|
|
@@ -9048,7 +9232,7 @@ async function tryImportGlobalSdkModules() {
|
|
|
9048
9232
|
]);
|
|
9049
9233
|
const piAiEntry = findAccessiblePath([
|
|
9050
9234
|
buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
|
|
9051
|
-
|
|
9235
|
+
path23.join(
|
|
9052
9236
|
globalNpmRoot,
|
|
9053
9237
|
"@mariozechner",
|
|
9054
9238
|
"pi-coding-agent",
|
|
@@ -9349,10 +9533,10 @@ ${fileList}`;
|
|
|
9349
9533
|
}
|
|
9350
9534
|
resolveCwd(cwdOverride) {
|
|
9351
9535
|
if (cwdOverride) {
|
|
9352
|
-
return
|
|
9536
|
+
return path23.resolve(cwdOverride);
|
|
9353
9537
|
}
|
|
9354
9538
|
if (this.config.cwd) {
|
|
9355
|
-
return
|
|
9539
|
+
return path23.resolve(this.config.cwd);
|
|
9356
9540
|
}
|
|
9357
9541
|
return process.cwd();
|
|
9358
9542
|
}
|
|
@@ -9371,9 +9555,9 @@ ${fileList}`;
|
|
|
9371
9555
|
}
|
|
9372
9556
|
resolveLogDirectory() {
|
|
9373
9557
|
if (this.config.logDir) {
|
|
9374
|
-
return
|
|
9558
|
+
return path23.resolve(this.config.logDir);
|
|
9375
9559
|
}
|
|
9376
|
-
return
|
|
9560
|
+
return path23.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
9377
9561
|
}
|
|
9378
9562
|
async createStreamLogger(request) {
|
|
9379
9563
|
const logDir = this.resolveLogDirectory();
|
|
@@ -9387,7 +9571,7 @@ ${fileList}`;
|
|
|
9387
9571
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
9388
9572
|
return void 0;
|
|
9389
9573
|
}
|
|
9390
|
-
const filePath =
|
|
9574
|
+
const filePath = path23.join(logDir, buildLogFilename6(request, this.targetName));
|
|
9391
9575
|
try {
|
|
9392
9576
|
const logger = await PiStreamLogger2.create({
|
|
9393
9577
|
filePath,
|
|
@@ -9601,18 +9785,18 @@ var ProviderRegistry = class {
|
|
|
9601
9785
|
|
|
9602
9786
|
// src/evaluation/providers/vscode-provider.ts
|
|
9603
9787
|
import { exec as exec2 } from "node:child_process";
|
|
9604
|
-
import { constants as constants3, access as access3, stat as
|
|
9605
|
-
import
|
|
9606
|
-
import { promisify as
|
|
9788
|
+
import { constants as constants3, access as access3, stat as stat6 } from "node:fs/promises";
|
|
9789
|
+
import path34 from "node:path";
|
|
9790
|
+
import { promisify as promisify4 } from "node:util";
|
|
9607
9791
|
|
|
9608
9792
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
9609
|
-
import { stat as
|
|
9610
|
-
import
|
|
9793
|
+
import { stat as stat5, writeFile as writeFile4 } from "node:fs/promises";
|
|
9794
|
+
import path32 from "node:path";
|
|
9611
9795
|
|
|
9612
9796
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
9613
9797
|
import { constants as constants2 } from "node:fs";
|
|
9614
|
-
import { access as access2, mkdir as mkdir8, readdir as
|
|
9615
|
-
import
|
|
9798
|
+
import { access as access2, mkdir as mkdir8, readdir as readdir3, rm as rm2, stat as stat3 } from "node:fs/promises";
|
|
9799
|
+
import path24 from "node:path";
|
|
9616
9800
|
async function pathExists(target) {
|
|
9617
9801
|
try {
|
|
9618
9802
|
await access2(target, constants2.F_OK);
|
|
@@ -9625,10 +9809,10 @@ async function ensureDir(target) {
|
|
|
9625
9809
|
await mkdir8(target, { recursive: true });
|
|
9626
9810
|
}
|
|
9627
9811
|
async function readDirEntries(target) {
|
|
9628
|
-
const entries = await
|
|
9812
|
+
const entries = await readdir3(target, { withFileTypes: true });
|
|
9629
9813
|
return entries.map((entry) => ({
|
|
9630
9814
|
name: entry.name,
|
|
9631
|
-
absolutePath:
|
|
9815
|
+
absolutePath: path24.join(target, entry.name),
|
|
9632
9816
|
isDirectory: entry.isDirectory()
|
|
9633
9817
|
}));
|
|
9634
9818
|
}
|
|
@@ -9643,9 +9827,9 @@ async function removeIfExists(target) {
|
|
|
9643
9827
|
}
|
|
9644
9828
|
|
|
9645
9829
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
9646
|
-
import
|
|
9830
|
+
import path25 from "node:path";
|
|
9647
9831
|
function pathToFileUri2(filePath) {
|
|
9648
|
-
const absolutePath =
|
|
9832
|
+
const absolutePath = path25.isAbsolute(filePath) ? filePath : path25.resolve(filePath);
|
|
9649
9833
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9650
9834
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9651
9835
|
return `file:///${normalizedPath}`;
|
|
@@ -9654,7 +9838,7 @@ function pathToFileUri2(filePath) {
|
|
|
9654
9838
|
}
|
|
9655
9839
|
|
|
9656
9840
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
9657
|
-
import
|
|
9841
|
+
import path26 from "node:path";
|
|
9658
9842
|
|
|
9659
9843
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
9660
9844
|
function renderTemplate2(content, variables) {
|
|
@@ -9746,8 +9930,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
9746
9930
|
});
|
|
9747
9931
|
}
|
|
9748
9932
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
9749
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
9750
|
-
const responseList = responseFiles.map((file) => `"${
|
|
9933
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path26.basename(file)}`).join("\n");
|
|
9934
|
+
const responseList = responseFiles.map((file) => `"${path26.basename(file)}"`).join(", ");
|
|
9751
9935
|
return renderTemplate2(templateContent, {
|
|
9752
9936
|
requestFiles: requestLines,
|
|
9753
9937
|
responseList
|
|
@@ -9755,8 +9939,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
9755
9939
|
}
|
|
9756
9940
|
|
|
9757
9941
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
9758
|
-
import { readFile as
|
|
9759
|
-
import
|
|
9942
|
+
import { readFile as readFile12 } from "node:fs/promises";
|
|
9943
|
+
import path27 from "node:path";
|
|
9760
9944
|
|
|
9761
9945
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
9762
9946
|
function sleep2(ms) {
|
|
@@ -9794,7 +9978,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
9794
9978
|
const maxAttempts = 10;
|
|
9795
9979
|
while (attempts < maxAttempts) {
|
|
9796
9980
|
try {
|
|
9797
|
-
const content = await
|
|
9981
|
+
const content = await readFile12(responseFileFinal, { encoding: "utf8" });
|
|
9798
9982
|
if (!silent) {
|
|
9799
9983
|
process.stdout.write(`${content}
|
|
9800
9984
|
`);
|
|
@@ -9815,7 +9999,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
9815
9999
|
}
|
|
9816
10000
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
9817
10001
|
if (!silent) {
|
|
9818
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
10002
|
+
const fileList = responseFilesFinal.map((file) => path27.basename(file)).join(", ");
|
|
9819
10003
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
9820
10004
|
}
|
|
9821
10005
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -9824,7 +10008,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9824
10008
|
while (pending.size > 0) {
|
|
9825
10009
|
if (Date.now() >= deadline) {
|
|
9826
10010
|
if (!silent) {
|
|
9827
|
-
const remaining = [...pending].map((f) =>
|
|
10011
|
+
const remaining = [...pending].map((f) => path27.basename(f)).join(", ");
|
|
9828
10012
|
console.error(
|
|
9829
10013
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
9830
10014
|
);
|
|
@@ -9851,7 +10035,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9851
10035
|
const maxAttempts = 10;
|
|
9852
10036
|
while (attempts < maxAttempts) {
|
|
9853
10037
|
try {
|
|
9854
|
-
const content = await
|
|
10038
|
+
const content = await readFile12(file, { encoding: "utf8" });
|
|
9855
10039
|
if (!silent) {
|
|
9856
10040
|
process.stdout.write(`${content}
|
|
9857
10041
|
`);
|
|
@@ -9875,21 +10059,21 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
9875
10059
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
9876
10060
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
9877
10061
|
import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
|
|
9878
|
-
import
|
|
9879
|
-
import { promisify as
|
|
10062
|
+
import path29 from "node:path";
|
|
10063
|
+
import { promisify as promisify3 } from "node:util";
|
|
9880
10064
|
|
|
9881
10065
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
9882
|
-
import
|
|
10066
|
+
import path28 from "node:path";
|
|
9883
10067
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
9884
10068
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
9885
10069
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
9886
10070
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
9887
|
-
return
|
|
10071
|
+
return path28.join(getSubagentsRoot(), folder);
|
|
9888
10072
|
}
|
|
9889
10073
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
9890
10074
|
|
|
9891
10075
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
9892
|
-
var
|
|
10076
|
+
var execAsync3 = promisify3(exec);
|
|
9893
10077
|
function shellQuote(cmd) {
|
|
9894
10078
|
return cmd.includes(" ") ? `"${cmd}"` : cmd;
|
|
9895
10079
|
}
|
|
@@ -9935,7 +10119,7 @@ async function raceSpawnError(child, graceMs = 200) {
|
|
|
9935
10119
|
}
|
|
9936
10120
|
async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
|
|
9937
10121
|
try {
|
|
9938
|
-
const { stdout } = await
|
|
10122
|
+
const { stdout } = await execAsync3(`${shellQuote(vscodeCmd)} --status`, {
|
|
9939
10123
|
timeout: 1e4,
|
|
9940
10124
|
windowsHide: true
|
|
9941
10125
|
});
|
|
@@ -9951,11 +10135,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
9951
10135
|
await raceSpawnError(child);
|
|
9952
10136
|
return true;
|
|
9953
10137
|
}
|
|
9954
|
-
const aliveFile =
|
|
10138
|
+
const aliveFile = path29.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
9955
10139
|
await removeIfExists(aliveFile);
|
|
9956
|
-
const githubAgentsDir =
|
|
10140
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
9957
10141
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
9958
|
-
const wakeupDst =
|
|
10142
|
+
const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
|
|
9959
10143
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
9960
10144
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
9961
10145
|
label: "open-workspace"
|
|
@@ -9968,7 +10152,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
9968
10152
|
"chat",
|
|
9969
10153
|
"-m",
|
|
9970
10154
|
wakeupChatId,
|
|
9971
|
-
`create a file named .alive in the ${
|
|
10155
|
+
`create a file named .alive in the ${path29.basename(subagentDir)} folder`
|
|
9972
10156
|
];
|
|
9973
10157
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
9974
10158
|
await raceSpawnError(wakeupChild);
|
|
@@ -9983,10 +10167,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
9983
10167
|
return true;
|
|
9984
10168
|
}
|
|
9985
10169
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
9986
|
-
const workspacePath =
|
|
9987
|
-
const messagesDir =
|
|
10170
|
+
const workspacePath = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
10171
|
+
const messagesDir = path29.join(subagentDir, "messages");
|
|
9988
10172
|
await mkdir9(messagesDir, { recursive: true });
|
|
9989
|
-
const reqFile =
|
|
10173
|
+
const reqFile = path29.join(messagesDir, `${timestamp}_req.md`);
|
|
9990
10174
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
9991
10175
|
const reqUri = pathToFileUri2(reqFile);
|
|
9992
10176
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -9994,16 +10178,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
9994
10178
|
chatArgs.push("-a", attachment);
|
|
9995
10179
|
}
|
|
9996
10180
|
chatArgs.push("-a", reqFile);
|
|
9997
|
-
chatArgs.push(`Follow instructions in [${
|
|
10181
|
+
chatArgs.push(`Follow instructions in [${path29.basename(reqFile)}](${reqUri})`);
|
|
9998
10182
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
9999
10183
|
workspacePath,
|
|
10000
|
-
|
|
10184
|
+
path29.basename(subagentDir),
|
|
10001
10185
|
subagentDir,
|
|
10002
10186
|
vscodeCmd
|
|
10003
10187
|
);
|
|
10004
10188
|
if (!workspaceReady) {
|
|
10005
10189
|
throw new Error(
|
|
10006
|
-
`VS Code workspace '${
|
|
10190
|
+
`VS Code workspace '${path29.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
10007
10191
|
);
|
|
10008
10192
|
}
|
|
10009
10193
|
await sleep2(500);
|
|
@@ -10011,8 +10195,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
10011
10195
|
await raceSpawnError(child);
|
|
10012
10196
|
}
|
|
10013
10197
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
10014
|
-
const workspacePath =
|
|
10015
|
-
const messagesDir =
|
|
10198
|
+
const workspacePath = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
10199
|
+
const messagesDir = path29.join(subagentDir, "messages");
|
|
10016
10200
|
await mkdir9(messagesDir, { recursive: true });
|
|
10017
10201
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
10018
10202
|
for (const attachment of attachmentPaths) {
|
|
@@ -10021,13 +10205,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
10021
10205
|
chatArgs.push(chatInstruction);
|
|
10022
10206
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
10023
10207
|
workspacePath,
|
|
10024
|
-
|
|
10208
|
+
path29.basename(subagentDir),
|
|
10025
10209
|
subagentDir,
|
|
10026
10210
|
vscodeCmd
|
|
10027
10211
|
);
|
|
10028
10212
|
if (!workspaceReady) {
|
|
10029
10213
|
throw new Error(
|
|
10030
|
-
`VS Code workspace '${
|
|
10214
|
+
`VS Code workspace '${path29.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
10031
10215
|
);
|
|
10032
10216
|
}
|
|
10033
10217
|
await sleep2(500);
|
|
@@ -10036,11 +10220,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
10036
10220
|
}
|
|
10037
10221
|
|
|
10038
10222
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
10039
|
-
import { copyFile, mkdir as mkdir10, readFile as
|
|
10040
|
-
import
|
|
10223
|
+
import { copyFile, mkdir as mkdir10, readFile as readFile13, readdir as readdir4, stat as stat4, writeFile as writeFile3 } from "node:fs/promises";
|
|
10224
|
+
import path31 from "node:path";
|
|
10041
10225
|
|
|
10042
10226
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
10043
|
-
import
|
|
10227
|
+
import path30 from "node:path";
|
|
10044
10228
|
import JSON5 from "json5";
|
|
10045
10229
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
10046
10230
|
let workspace;
|
|
@@ -10057,10 +10241,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10057
10241
|
}
|
|
10058
10242
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
10059
10243
|
const folderPath = folder.path;
|
|
10060
|
-
if (
|
|
10244
|
+
if (path30.isAbsolute(folderPath)) {
|
|
10061
10245
|
return folder;
|
|
10062
10246
|
}
|
|
10063
|
-
const absolutePath =
|
|
10247
|
+
const absolutePath = path30.resolve(templateDir, folderPath);
|
|
10064
10248
|
return {
|
|
10065
10249
|
...folder,
|
|
10066
10250
|
path: absolutePath
|
|
@@ -10082,19 +10266,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
10082
10266
|
if (locationMap && typeof locationMap === "object") {
|
|
10083
10267
|
const transformedMap = {};
|
|
10084
10268
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
10085
|
-
const isAbsolute =
|
|
10269
|
+
const isAbsolute = path30.isAbsolute(locationPath);
|
|
10086
10270
|
if (isAbsolute) {
|
|
10087
10271
|
transformedMap[locationPath] = value;
|
|
10088
10272
|
} else {
|
|
10089
10273
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
10090
10274
|
if (firstGlobIndex === -1) {
|
|
10091
|
-
const resolvedPath =
|
|
10275
|
+
const resolvedPath = path30.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
10092
10276
|
transformedMap[resolvedPath] = value;
|
|
10093
10277
|
} else {
|
|
10094
10278
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
10095
10279
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
10096
10280
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
10097
|
-
const resolvedPath = (
|
|
10281
|
+
const resolvedPath = (path30.resolve(templateDir, basePath) + patternPath).replace(
|
|
10098
10282
|
/\\/g,
|
|
10099
10283
|
"/"
|
|
10100
10284
|
);
|
|
@@ -10135,7 +10319,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
10135
10319
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
10136
10320
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
10137
10321
|
for (const subagent of subagents) {
|
|
10138
|
-
const lockFile =
|
|
10322
|
+
const lockFile = path31.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
10139
10323
|
if (!await pathExists(lockFile)) {
|
|
10140
10324
|
return subagent.absolutePath;
|
|
10141
10325
|
}
|
|
@@ -10145,26 +10329,26 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
10145
10329
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
10146
10330
|
let workspaceContent;
|
|
10147
10331
|
if (workspaceTemplate) {
|
|
10148
|
-
const workspaceSrc =
|
|
10332
|
+
const workspaceSrc = path31.resolve(workspaceTemplate);
|
|
10149
10333
|
if (!await pathExists(workspaceSrc)) {
|
|
10150
10334
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
10151
10335
|
}
|
|
10152
|
-
const stats = await
|
|
10336
|
+
const stats = await stat4(workspaceSrc);
|
|
10153
10337
|
if (!stats.isFile()) {
|
|
10154
10338
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
10155
10339
|
}
|
|
10156
|
-
const templateText = await
|
|
10340
|
+
const templateText = await readFile13(workspaceSrc, "utf8");
|
|
10157
10341
|
workspaceContent = JSON.parse(templateText);
|
|
10158
10342
|
} else {
|
|
10159
10343
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
10160
10344
|
}
|
|
10161
|
-
const workspaceName = `${
|
|
10162
|
-
const workspaceDst =
|
|
10163
|
-
const templateDir = workspaceTemplate ?
|
|
10345
|
+
const workspaceName = `${path31.basename(subagentDir)}.code-workspace`;
|
|
10346
|
+
const workspaceDst = path31.join(subagentDir, workspaceName);
|
|
10347
|
+
const templateDir = workspaceTemplate ? path31.dirname(path31.resolve(workspaceTemplate)) : subagentDir;
|
|
10164
10348
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
10165
10349
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
10166
10350
|
if (cwd) {
|
|
10167
|
-
const absCwd =
|
|
10351
|
+
const absCwd = path31.resolve(cwd);
|
|
10168
10352
|
const parsed = JSON.parse(transformedContent);
|
|
10169
10353
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
10170
10354
|
if (!alreadyPresent) {
|
|
@@ -10173,35 +10357,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
10173
10357
|
}
|
|
10174
10358
|
}
|
|
10175
10359
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
10176
|
-
const messagesDir =
|
|
10360
|
+
const messagesDir = path31.join(subagentDir, "messages");
|
|
10177
10361
|
await mkdir10(messagesDir, { recursive: true });
|
|
10178
10362
|
return { workspace: workspaceDst, messagesDir };
|
|
10179
10363
|
}
|
|
10180
10364
|
async function createSubagentLock(subagentDir) {
|
|
10181
|
-
const messagesDir =
|
|
10365
|
+
const messagesDir = path31.join(subagentDir, "messages");
|
|
10182
10366
|
if (await pathExists(messagesDir)) {
|
|
10183
|
-
const files = await
|
|
10367
|
+
const files = await readdir4(messagesDir);
|
|
10184
10368
|
await Promise.all(
|
|
10185
10369
|
files.map(async (file) => {
|
|
10186
|
-
const target =
|
|
10370
|
+
const target = path31.join(messagesDir, file);
|
|
10187
10371
|
await removeIfExists(target);
|
|
10188
10372
|
})
|
|
10189
10373
|
);
|
|
10190
10374
|
}
|
|
10191
|
-
const githubAgentsDir =
|
|
10375
|
+
const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
|
|
10192
10376
|
if (await pathExists(githubAgentsDir)) {
|
|
10193
|
-
const agentFiles = await
|
|
10377
|
+
const agentFiles = await readdir4(githubAgentsDir);
|
|
10194
10378
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
10195
10379
|
await Promise.all(
|
|
10196
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
10380
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path31.join(githubAgentsDir, file)))
|
|
10197
10381
|
);
|
|
10198
10382
|
}
|
|
10199
|
-
const lockFile =
|
|
10383
|
+
const lockFile = path31.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
10200
10384
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
10201
10385
|
return lockFile;
|
|
10202
10386
|
}
|
|
10203
10387
|
async function removeSubagentLock(subagentDir) {
|
|
10204
|
-
const lockFile =
|
|
10388
|
+
const lockFile = path31.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
10205
10389
|
await removeIfExists(lockFile);
|
|
10206
10390
|
}
|
|
10207
10391
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -10221,9 +10405,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
10221
10405
|
return 1;
|
|
10222
10406
|
}
|
|
10223
10407
|
if (promptFile) {
|
|
10224
|
-
const githubAgentsDir =
|
|
10408
|
+
const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
|
|
10225
10409
|
await mkdir10(githubAgentsDir, { recursive: true });
|
|
10226
|
-
const agentFile =
|
|
10410
|
+
const agentFile = path31.join(githubAgentsDir, `${chatId}.md`);
|
|
10227
10411
|
try {
|
|
10228
10412
|
await copyFile(promptFile, agentFile);
|
|
10229
10413
|
} catch (error) {
|
|
@@ -10242,11 +10426,11 @@ async function resolvePromptFile(promptFile) {
|
|
|
10242
10426
|
if (!promptFile) {
|
|
10243
10427
|
return void 0;
|
|
10244
10428
|
}
|
|
10245
|
-
const resolvedPrompt =
|
|
10429
|
+
const resolvedPrompt = path32.resolve(promptFile);
|
|
10246
10430
|
if (!await pathExists(resolvedPrompt)) {
|
|
10247
10431
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
10248
10432
|
}
|
|
10249
|
-
const promptStats = await
|
|
10433
|
+
const promptStats = await stat5(resolvedPrompt);
|
|
10250
10434
|
if (!promptStats.isFile()) {
|
|
10251
10435
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
10252
10436
|
}
|
|
@@ -10258,7 +10442,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
10258
10442
|
}
|
|
10259
10443
|
const resolved = [];
|
|
10260
10444
|
for (const attachment of extraAttachments) {
|
|
10261
|
-
const resolvedPath =
|
|
10445
|
+
const resolvedPath = path32.resolve(attachment);
|
|
10262
10446
|
if (!await pathExists(resolvedPath)) {
|
|
10263
10447
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
10264
10448
|
}
|
|
@@ -10300,7 +10484,7 @@ async function dispatchAgentSession(options) {
|
|
|
10300
10484
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
10301
10485
|
};
|
|
10302
10486
|
}
|
|
10303
|
-
const subagentName =
|
|
10487
|
+
const subagentName = path32.basename(subagentDir);
|
|
10304
10488
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
10305
10489
|
const preparationResult = await prepareSubagentDirectory(
|
|
10306
10490
|
subagentDir,
|
|
@@ -10328,9 +10512,9 @@ async function dispatchAgentSession(options) {
|
|
|
10328
10512
|
};
|
|
10329
10513
|
}
|
|
10330
10514
|
const timestamp = generateTimestamp();
|
|
10331
|
-
const messagesDir =
|
|
10332
|
-
const responseFileTmp =
|
|
10333
|
-
const responseFileFinal =
|
|
10515
|
+
const messagesDir = path32.join(subagentDir, "messages");
|
|
10516
|
+
const responseFileTmp = path32.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
10517
|
+
const responseFileFinal = path32.join(messagesDir, `${timestamp}_res.md`);
|
|
10334
10518
|
const requestInstructions = createRequestPrompt(
|
|
10335
10519
|
userQuery,
|
|
10336
10520
|
responseFileTmp,
|
|
@@ -10435,7 +10619,7 @@ async function dispatchBatchAgent(options) {
|
|
|
10435
10619
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
10436
10620
|
};
|
|
10437
10621
|
}
|
|
10438
|
-
subagentName =
|
|
10622
|
+
subagentName = path32.basename(subagentDir);
|
|
10439
10623
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
10440
10624
|
const preparationResult = await prepareSubagentDirectory(
|
|
10441
10625
|
subagentDir,
|
|
@@ -10466,17 +10650,17 @@ async function dispatchBatchAgent(options) {
|
|
|
10466
10650
|
};
|
|
10467
10651
|
}
|
|
10468
10652
|
const timestamp = generateTimestamp();
|
|
10469
|
-
const messagesDir =
|
|
10653
|
+
const messagesDir = path32.join(subagentDir, "messages");
|
|
10470
10654
|
requestFiles = userQueries.map(
|
|
10471
|
-
(_, index) =>
|
|
10655
|
+
(_, index) => path32.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
10472
10656
|
);
|
|
10473
10657
|
const responseTmpFiles = userQueries.map(
|
|
10474
|
-
(_, index) =>
|
|
10658
|
+
(_, index) => path32.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
10475
10659
|
);
|
|
10476
10660
|
responseFilesFinal = userQueries.map(
|
|
10477
|
-
(_, index) =>
|
|
10661
|
+
(_, index) => path32.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
10478
10662
|
);
|
|
10479
|
-
const orchestratorFile =
|
|
10663
|
+
const orchestratorFile = path32.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
10480
10664
|
if (!dryRun) {
|
|
10481
10665
|
await Promise.all(
|
|
10482
10666
|
userQueries.map((query, index) => {
|
|
@@ -10562,7 +10746,7 @@ async function dispatchBatchAgent(options) {
|
|
|
10562
10746
|
|
|
10563
10747
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
10564
10748
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
10565
|
-
import
|
|
10749
|
+
import path33 from "node:path";
|
|
10566
10750
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
10567
10751
|
folders: [
|
|
10568
10752
|
{
|
|
@@ -10593,7 +10777,7 @@ async function provisionSubagents(options) {
|
|
|
10593
10777
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
10594
10778
|
throw new Error("subagents must be a positive integer");
|
|
10595
10779
|
}
|
|
10596
|
-
const targetPath =
|
|
10780
|
+
const targetPath = path33.resolve(targetRoot);
|
|
10597
10781
|
if (!dryRun) {
|
|
10598
10782
|
await ensureDir(targetPath);
|
|
10599
10783
|
}
|
|
@@ -10613,7 +10797,7 @@ async function provisionSubagents(options) {
|
|
|
10613
10797
|
continue;
|
|
10614
10798
|
}
|
|
10615
10799
|
highestNumber = Math.max(highestNumber, parsed);
|
|
10616
|
-
const lockFile =
|
|
10800
|
+
const lockFile = path33.join(entry.absolutePath, lockName);
|
|
10617
10801
|
const locked = await pathExists(lockFile);
|
|
10618
10802
|
if (locked) {
|
|
10619
10803
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -10630,10 +10814,10 @@ async function provisionSubagents(options) {
|
|
|
10630
10814
|
break;
|
|
10631
10815
|
}
|
|
10632
10816
|
const subagentDir = subagent.absolutePath;
|
|
10633
|
-
const githubAgentsDir =
|
|
10634
|
-
const lockFile =
|
|
10635
|
-
const workspaceDst =
|
|
10636
|
-
const wakeupDst =
|
|
10817
|
+
const githubAgentsDir = path33.join(subagentDir, ".github", "agents");
|
|
10818
|
+
const lockFile = path33.join(subagentDir, lockName);
|
|
10819
|
+
const workspaceDst = path33.join(subagentDir, `${path33.basename(subagentDir)}.code-workspace`);
|
|
10820
|
+
const wakeupDst = path33.join(githubAgentsDir, "wakeup.md");
|
|
10637
10821
|
const isLocked = await pathExists(lockFile);
|
|
10638
10822
|
if (isLocked && !force) {
|
|
10639
10823
|
continue;
|
|
@@ -10671,10 +10855,10 @@ async function provisionSubagents(options) {
|
|
|
10671
10855
|
let nextIndex = highestNumber;
|
|
10672
10856
|
while (subagentsProvisioned < subagents) {
|
|
10673
10857
|
nextIndex += 1;
|
|
10674
|
-
const subagentDir =
|
|
10675
|
-
const githubAgentsDir =
|
|
10676
|
-
const workspaceDst =
|
|
10677
|
-
const wakeupDst =
|
|
10858
|
+
const subagentDir = path33.join(targetPath, `subagent-${nextIndex}`);
|
|
10859
|
+
const githubAgentsDir = path33.join(subagentDir, ".github", "agents");
|
|
10860
|
+
const workspaceDst = path33.join(subagentDir, `${path33.basename(subagentDir)}.code-workspace`);
|
|
10861
|
+
const wakeupDst = path33.join(githubAgentsDir, "wakeup.md");
|
|
10678
10862
|
if (!dryRun) {
|
|
10679
10863
|
await ensureDir(subagentDir);
|
|
10680
10864
|
await ensureDir(githubAgentsDir);
|
|
@@ -10721,7 +10905,7 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
10721
10905
|
`;
|
|
10722
10906
|
|
|
10723
10907
|
// src/evaluation/providers/vscode-provider.ts
|
|
10724
|
-
var
|
|
10908
|
+
var execAsync4 = promisify4(exec2);
|
|
10725
10909
|
var VSCodeProvider = class {
|
|
10726
10910
|
id;
|
|
10727
10911
|
kind;
|
|
@@ -10864,7 +11048,7 @@ var VSCodeProvider = class {
|
|
|
10864
11048
|
async function locateVSCodeExecutable(candidate) {
|
|
10865
11049
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
10866
11050
|
if (includesPathSeparator) {
|
|
10867
|
-
const resolved =
|
|
11051
|
+
const resolved = path34.isAbsolute(candidate) ? candidate : path34.resolve(candidate);
|
|
10868
11052
|
try {
|
|
10869
11053
|
await access3(resolved, constants3.F_OK);
|
|
10870
11054
|
return resolved;
|
|
@@ -10876,7 +11060,7 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
10876
11060
|
}
|
|
10877
11061
|
const locator = process.platform === "win32" ? "where" : "which";
|
|
10878
11062
|
try {
|
|
10879
|
-
const { stdout } = await
|
|
11063
|
+
const { stdout } = await execAsync4(`${locator} ${candidate}`);
|
|
10880
11064
|
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
10881
11065
|
if (lines.length > 0 && lines[0]) {
|
|
10882
11066
|
await access3(lines[0], constants3.F_OK);
|
|
@@ -10893,7 +11077,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
10893
11077
|
return void 0;
|
|
10894
11078
|
}
|
|
10895
11079
|
try {
|
|
10896
|
-
const stats = await
|
|
11080
|
+
const stats = await stat6(path34.resolve(template));
|
|
10897
11081
|
return stats.isFile() ? template : void 0;
|
|
10898
11082
|
} catch {
|
|
10899
11083
|
return template;
|
|
@@ -10917,7 +11101,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
10917
11101
|
return "";
|
|
10918
11102
|
}
|
|
10919
11103
|
const buildList = (files) => files.map((absolutePath) => {
|
|
10920
|
-
const fileName =
|
|
11104
|
+
const fileName = path34.basename(absolutePath);
|
|
10921
11105
|
const fileUri = pathToFileUri3(absolutePath);
|
|
10922
11106
|
return `* [${fileName}](${fileUri})`;
|
|
10923
11107
|
});
|
|
@@ -10938,7 +11122,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
10938
11122
|
}
|
|
10939
11123
|
const unique = /* @__PURE__ */ new Map();
|
|
10940
11124
|
for (const attachment of attachments) {
|
|
10941
|
-
const absolutePath =
|
|
11125
|
+
const absolutePath = path34.resolve(attachment);
|
|
10942
11126
|
if (!unique.has(absolutePath)) {
|
|
10943
11127
|
unique.set(absolutePath, absolutePath);
|
|
10944
11128
|
}
|
|
@@ -10946,7 +11130,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
10946
11130
|
return Array.from(unique.values());
|
|
10947
11131
|
}
|
|
10948
11132
|
function pathToFileUri3(filePath) {
|
|
10949
|
-
const absolutePath =
|
|
11133
|
+
const absolutePath = path34.isAbsolute(filePath) ? filePath : path34.resolve(filePath);
|
|
10950
11134
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
10951
11135
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
10952
11136
|
return `file:///${normalizedPath}`;
|
|
@@ -10959,7 +11143,7 @@ function normalizeAttachments(attachments) {
|
|
|
10959
11143
|
}
|
|
10960
11144
|
const deduped = /* @__PURE__ */ new Set();
|
|
10961
11145
|
for (const attachment of attachments) {
|
|
10962
|
-
deduped.add(
|
|
11146
|
+
deduped.add(path34.resolve(attachment));
|
|
10963
11147
|
}
|
|
10964
11148
|
return Array.from(deduped);
|
|
10965
11149
|
}
|
|
@@ -10968,7 +11152,7 @@ function mergeAttachments(all) {
|
|
|
10968
11152
|
for (const list of all) {
|
|
10969
11153
|
if (!list) continue;
|
|
10970
11154
|
for (const inputFile of list) {
|
|
10971
|
-
deduped.add(
|
|
11155
|
+
deduped.add(path34.resolve(inputFile));
|
|
10972
11156
|
}
|
|
10973
11157
|
}
|
|
10974
11158
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -11016,8 +11200,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
11016
11200
|
|
|
11017
11201
|
// src/evaluation/providers/targets-file.ts
|
|
11018
11202
|
import { constants as constants4 } from "node:fs";
|
|
11019
|
-
import { access as access4, readFile as
|
|
11020
|
-
import
|
|
11203
|
+
import { access as access4, readFile as readFile14 } from "node:fs/promises";
|
|
11204
|
+
import path35 from "node:path";
|
|
11021
11205
|
import { parse as parse5 } from "yaml";
|
|
11022
11206
|
function isRecord(value) {
|
|
11023
11207
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -11057,11 +11241,11 @@ async function fileExists3(filePath) {
|
|
|
11057
11241
|
}
|
|
11058
11242
|
}
|
|
11059
11243
|
async function readTargetDefinitions(filePath) {
|
|
11060
|
-
const absolutePath =
|
|
11244
|
+
const absolutePath = path35.resolve(filePath);
|
|
11061
11245
|
if (!await fileExists3(absolutePath)) {
|
|
11062
11246
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
11063
11247
|
}
|
|
11064
|
-
const raw = await
|
|
11248
|
+
const raw = await readFile14(absolutePath, "utf8");
|
|
11065
11249
|
const parsed = parse5(raw);
|
|
11066
11250
|
if (!isRecord(parsed)) {
|
|
11067
11251
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -11077,16 +11261,16 @@ function listTargetNames(definitions) {
|
|
|
11077
11261
|
}
|
|
11078
11262
|
|
|
11079
11263
|
// src/evaluation/providers/provider-discovery.ts
|
|
11080
|
-
import
|
|
11264
|
+
import path36 from "node:path";
|
|
11081
11265
|
import fg from "fast-glob";
|
|
11082
11266
|
async function discoverProviders(registry, baseDir) {
|
|
11083
11267
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
11084
11268
|
const candidateDirs = [];
|
|
11085
|
-
let dir =
|
|
11086
|
-
const root =
|
|
11269
|
+
let dir = path36.resolve(baseDir);
|
|
11270
|
+
const root = path36.parse(dir).root;
|
|
11087
11271
|
while (dir !== root) {
|
|
11088
|
-
candidateDirs.push(
|
|
11089
|
-
dir =
|
|
11272
|
+
candidateDirs.push(path36.join(dir, ".agentv", "providers"));
|
|
11273
|
+
dir = path36.dirname(dir);
|
|
11090
11274
|
}
|
|
11091
11275
|
let files = [];
|
|
11092
11276
|
for (const providersDir of candidateDirs) {
|
|
@@ -11102,7 +11286,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
11102
11286
|
}
|
|
11103
11287
|
const discoveredKinds = [];
|
|
11104
11288
|
for (const filePath of files) {
|
|
11105
|
-
const basename =
|
|
11289
|
+
const basename = path36.basename(filePath);
|
|
11106
11290
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
11107
11291
|
if (registry.has(kindName)) {
|
|
11108
11292
|
continue;
|
|
@@ -11767,7 +11951,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
11767
11951
|
|
|
11768
11952
|
// src/evaluation/evaluators/llm-grader.ts
|
|
11769
11953
|
import fs2 from "node:fs/promises";
|
|
11770
|
-
import
|
|
11954
|
+
import path37 from "node:path";
|
|
11771
11955
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
11772
11956
|
import { z as z3 } from "zod";
|
|
11773
11957
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -12801,8 +12985,8 @@ function toAiSdkImageParts(images) {
|
|
|
12801
12985
|
}));
|
|
12802
12986
|
}
|
|
12803
12987
|
function resolveSandboxed(basePath, relativePath) {
|
|
12804
|
-
const resolved =
|
|
12805
|
-
if (!resolved.startsWith(basePath +
|
|
12988
|
+
const resolved = path37.resolve(basePath, relativePath);
|
|
12989
|
+
if (!resolved.startsWith(basePath + path37.sep) && resolved !== basePath) {
|
|
12806
12990
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
12807
12991
|
}
|
|
12808
12992
|
return resolved;
|
|
@@ -12835,11 +13019,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
12835
13019
|
execute: async (input) => {
|
|
12836
13020
|
try {
|
|
12837
13021
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
12838
|
-
const
|
|
12839
|
-
if (
|
|
13022
|
+
const stat13 = await fs2.stat(resolved);
|
|
13023
|
+
if (stat13.isDirectory()) {
|
|
12840
13024
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
12841
13025
|
}
|
|
12842
|
-
const buffer = Buffer.alloc(Math.min(
|
|
13026
|
+
const buffer = Buffer.alloc(Math.min(stat13.size, MAX_FILE_SIZE));
|
|
12843
13027
|
const fd = await fs2.open(resolved, "r");
|
|
12844
13028
|
try {
|
|
12845
13029
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -12847,8 +13031,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
12847
13031
|
await fd.close();
|
|
12848
13032
|
}
|
|
12849
13033
|
const content = buffer.toString("utf-8");
|
|
12850
|
-
const truncated =
|
|
12851
|
-
return { content, truncated, size:
|
|
13034
|
+
const truncated = stat13.size > MAX_FILE_SIZE;
|
|
13035
|
+
return { content, truncated, size: stat13.size };
|
|
12852
13036
|
} catch (error) {
|
|
12853
13037
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
12854
13038
|
}
|
|
@@ -12892,15 +13076,15 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
12892
13076
|
for (const entry of entries) {
|
|
12893
13077
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
12894
13078
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
12895
|
-
const fullPath =
|
|
13079
|
+
const fullPath = path37.join(dirPath, entry.name);
|
|
12896
13080
|
if (entry.isDirectory()) {
|
|
12897
13081
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
12898
13082
|
} else if (entry.isFile()) {
|
|
12899
|
-
const ext =
|
|
13083
|
+
const ext = path37.extname(entry.name).toLowerCase();
|
|
12900
13084
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
12901
13085
|
try {
|
|
12902
|
-
const
|
|
12903
|
-
if (
|
|
13086
|
+
const stat13 = await fs2.stat(fullPath);
|
|
13087
|
+
if (stat13.size > MAX_FILE_SIZE) continue;
|
|
12904
13088
|
const content = await fs2.readFile(fullPath, "utf-8");
|
|
12905
13089
|
const lines = content.split("\n");
|
|
12906
13090
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -12908,7 +13092,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
12908
13092
|
regex.lastIndex = 0;
|
|
12909
13093
|
if (regex.test(lines[i])) {
|
|
12910
13094
|
matches.push({
|
|
12911
|
-
file:
|
|
13095
|
+
file: path37.relative(workspacePath, fullPath),
|
|
12912
13096
|
line: i + 1,
|
|
12913
13097
|
text: lines[i].substring(0, 200)
|
|
12914
13098
|
});
|
|
@@ -14868,7 +15052,7 @@ function runEqualsAssertion(output, value) {
|
|
|
14868
15052
|
// src/evaluation/orchestrator.ts
|
|
14869
15053
|
import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
|
|
14870
15054
|
import { existsSync as existsSync5 } from "node:fs";
|
|
14871
|
-
import { copyFile as copyFile2, mkdir as mkdir14, readdir as
|
|
15055
|
+
import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir8, stat as stat9 } from "node:fs/promises";
|
|
14872
15056
|
import path45 from "node:path";
|
|
14873
15057
|
import micromatch3 from "micromatch";
|
|
14874
15058
|
|
|
@@ -15083,7 +15267,7 @@ var InlineAssertEvaluator = class {
|
|
|
15083
15267
|
};
|
|
15084
15268
|
|
|
15085
15269
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
15086
|
-
import
|
|
15270
|
+
import path38 from "node:path";
|
|
15087
15271
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
15088
15272
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
15089
15273
|
if (!context) {
|
|
@@ -15135,7 +15319,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
15135
15319
|
};
|
|
15136
15320
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
15137
15321
|
const scriptPath = script[script.length - 1];
|
|
15138
|
-
const cwd =
|
|
15322
|
+
const cwd = path38.dirname(scriptPath);
|
|
15139
15323
|
try {
|
|
15140
15324
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
15141
15325
|
const prompt = stdout.trim();
|
|
@@ -15418,16 +15602,16 @@ function createBuiltinRegistry() {
|
|
|
15418
15602
|
}
|
|
15419
15603
|
|
|
15420
15604
|
// src/evaluation/registry/assertion-discovery.ts
|
|
15421
|
-
import
|
|
15605
|
+
import path39 from "node:path";
|
|
15422
15606
|
import fg2 from "fast-glob";
|
|
15423
15607
|
async function discoverAssertions(registry, baseDir) {
|
|
15424
15608
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
15425
15609
|
const candidateDirs = [];
|
|
15426
|
-
let dir =
|
|
15427
|
-
const root =
|
|
15610
|
+
let dir = path39.resolve(baseDir);
|
|
15611
|
+
const root = path39.parse(dir).root;
|
|
15428
15612
|
while (dir !== root) {
|
|
15429
|
-
candidateDirs.push(
|
|
15430
|
-
dir =
|
|
15613
|
+
candidateDirs.push(path39.join(dir, ".agentv", "assertions"));
|
|
15614
|
+
dir = path39.dirname(dir);
|
|
15431
15615
|
}
|
|
15432
15616
|
let files = [];
|
|
15433
15617
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -15443,7 +15627,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
15443
15627
|
}
|
|
15444
15628
|
const discoveredTypes = [];
|
|
15445
15629
|
for (const filePath of files) {
|
|
15446
|
-
const basename =
|
|
15630
|
+
const basename = path39.basename(filePath);
|
|
15447
15631
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
15448
15632
|
if (registry.has(typeName)) {
|
|
15449
15633
|
continue;
|
|
@@ -15461,17 +15645,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
15461
15645
|
}
|
|
15462
15646
|
|
|
15463
15647
|
// src/evaluation/registry/grader-discovery.ts
|
|
15464
|
-
import
|
|
15648
|
+
import path40 from "node:path";
|
|
15465
15649
|
import fg3 from "fast-glob";
|
|
15466
15650
|
async function discoverGraders(registry, baseDir) {
|
|
15467
15651
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
15468
15652
|
const candidateDirs = [];
|
|
15469
|
-
let dir =
|
|
15470
|
-
const root =
|
|
15653
|
+
let dir = path40.resolve(baseDir);
|
|
15654
|
+
const root = path40.parse(dir).root;
|
|
15471
15655
|
while (dir !== root) {
|
|
15472
|
-
candidateDirs.push(
|
|
15473
|
-
candidateDirs.push(
|
|
15474
|
-
dir =
|
|
15656
|
+
candidateDirs.push(path40.join(dir, ".agentv", "graders"));
|
|
15657
|
+
candidateDirs.push(path40.join(dir, ".agentv", "judges"));
|
|
15658
|
+
dir = path40.dirname(dir);
|
|
15475
15659
|
}
|
|
15476
15660
|
let files = [];
|
|
15477
15661
|
for (const gradersDir of candidateDirs) {
|
|
@@ -15487,7 +15671,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
15487
15671
|
}
|
|
15488
15672
|
const discoveredTypes = [];
|
|
15489
15673
|
for (const filePath of files) {
|
|
15490
|
-
const basename =
|
|
15674
|
+
const basename = path40.basename(filePath);
|
|
15491
15675
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
15492
15676
|
if (registry.has(typeName)) {
|
|
15493
15677
|
continue;
|
|
@@ -15644,57 +15828,8 @@ function getTCritical(df) {
|
|
|
15644
15828
|
return T_TABLE_95[df - 1];
|
|
15645
15829
|
}
|
|
15646
15830
|
|
|
15647
|
-
// src/evaluation/workspace/file-changes.ts
|
|
15648
|
-
import { exec as execCallback } from "node:child_process";
|
|
15649
|
-
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
15650
|
-
import path40 from "node:path";
|
|
15651
|
-
import { promisify as promisify4 } from "node:util";
|
|
15652
|
-
var execAsync4 = promisify4(execCallback);
|
|
15653
|
-
function gitExecOpts(workspacePath) {
|
|
15654
|
-
const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
|
|
15655
|
-
return { cwd: workspacePath, env };
|
|
15656
|
-
}
|
|
15657
|
-
async function initializeBaseline(workspacePath) {
|
|
15658
|
-
const opts = gitExecOpts(workspacePath);
|
|
15659
|
-
await execAsync4("git init", opts);
|
|
15660
|
-
await execAsync4("git add -A", opts);
|
|
15661
|
-
await execAsync4(
|
|
15662
|
-
'git -c user.email=agentv@localhost -c user.name=agentv commit --allow-empty -m "agentv-baseline"',
|
|
15663
|
-
opts
|
|
15664
|
-
);
|
|
15665
|
-
const { stdout } = await execAsync4("git rev-parse HEAD", opts);
|
|
15666
|
-
return stdout.trim();
|
|
15667
|
-
}
|
|
15668
|
-
async function captureFileChanges(workspacePath, baselineCommit) {
|
|
15669
|
-
const opts = gitExecOpts(workspacePath);
|
|
15670
|
-
await stageNestedRepoChanges(workspacePath);
|
|
15671
|
-
await execAsync4("git add -A", opts);
|
|
15672
|
-
const { stdout } = await execAsync4(`git diff ${baselineCommit} --submodule=diff`, opts);
|
|
15673
|
-
return stdout.trim();
|
|
15674
|
-
}
|
|
15675
|
-
async function stageNestedRepoChanges(workspacePath) {
|
|
15676
|
-
let entries;
|
|
15677
|
-
try {
|
|
15678
|
-
entries = readdirSync2(workspacePath);
|
|
15679
|
-
} catch {
|
|
15680
|
-
return;
|
|
15681
|
-
}
|
|
15682
|
-
for (const entry of entries) {
|
|
15683
|
-
if (entry === ".git" || entry === "node_modules") continue;
|
|
15684
|
-
const childPath = path40.join(workspacePath, entry);
|
|
15685
|
-
try {
|
|
15686
|
-
if (!statSync(childPath).isDirectory()) continue;
|
|
15687
|
-
if (!statSync(path40.join(childPath, ".git")).isDirectory()) continue;
|
|
15688
|
-
} catch {
|
|
15689
|
-
continue;
|
|
15690
|
-
}
|
|
15691
|
-
const childOpts = gitExecOpts(childPath);
|
|
15692
|
-
await execAsync4("git add -A", childOpts);
|
|
15693
|
-
}
|
|
15694
|
-
}
|
|
15695
|
-
|
|
15696
15831
|
// src/evaluation/workspace/manager.ts
|
|
15697
|
-
import { cp, mkdir as mkdir12, readdir as
|
|
15832
|
+
import { cp, mkdir as mkdir12, readdir as readdir5, rm as rm4, stat as stat7 } from "node:fs/promises";
|
|
15698
15833
|
import path41 from "node:path";
|
|
15699
15834
|
var TemplateNotFoundError = class extends Error {
|
|
15700
15835
|
constructor(templatePath) {
|
|
@@ -15717,7 +15852,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
15717
15852
|
};
|
|
15718
15853
|
async function isDirectory(filePath) {
|
|
15719
15854
|
try {
|
|
15720
|
-
const stats = await
|
|
15855
|
+
const stats = await stat7(filePath);
|
|
15721
15856
|
return stats.isDirectory();
|
|
15722
15857
|
} catch {
|
|
15723
15858
|
return false;
|
|
@@ -15729,7 +15864,7 @@ function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
|
15729
15864
|
}
|
|
15730
15865
|
async function copyDirectoryRecursive(src, dest) {
|
|
15731
15866
|
await mkdir12(dest, { recursive: true });
|
|
15732
|
-
const entries = await
|
|
15867
|
+
const entries = await readdir5(src, { withFileTypes: true });
|
|
15733
15868
|
for (const entry of entries) {
|
|
15734
15869
|
const srcPath = path41.join(src, entry.name);
|
|
15735
15870
|
const destPath = path41.join(dest, entry.name);
|
|
@@ -15803,7 +15938,7 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
15803
15938
|
import { execFile } from "node:child_process";
|
|
15804
15939
|
import { createHash } from "node:crypto";
|
|
15805
15940
|
import { existsSync as existsSync3 } from "node:fs";
|
|
15806
|
-
import { cp as cp2, mkdir as mkdir13, readFile as
|
|
15941
|
+
import { cp as cp2, mkdir as mkdir13, readFile as readFile15, readdir as readdir6, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
15807
15942
|
import path42 from "node:path";
|
|
15808
15943
|
import { promisify as promisify5 } from "node:util";
|
|
15809
15944
|
var execFileAsync = promisify5(execFile);
|
|
@@ -15858,7 +15993,7 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
15858
15993
|
}
|
|
15859
15994
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
15860
15995
|
await mkdir13(dest, { recursive: true });
|
|
15861
|
-
const entries = await
|
|
15996
|
+
const entries = await readdir6(src, { withFileTypes: true });
|
|
15862
15997
|
for (const entry of entries) {
|
|
15863
15998
|
const srcPath = path42.join(src, entry.name);
|
|
15864
15999
|
const destPath = path42.join(dest, entry.name);
|
|
@@ -15966,7 +16101,7 @@ var WorkspacePoolManager = class {
|
|
|
15966
16101
|
throw err;
|
|
15967
16102
|
}
|
|
15968
16103
|
try {
|
|
15969
|
-
const pidStr = await
|
|
16104
|
+
const pidStr = await readFile15(lockPath, "utf-8");
|
|
15970
16105
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
15971
16106
|
if (!Number.isNaN(pid)) {
|
|
15972
16107
|
try {
|
|
@@ -15993,7 +16128,7 @@ var WorkspacePoolManager = class {
|
|
|
15993
16128
|
async checkDrift(poolDir, fingerprint) {
|
|
15994
16129
|
const metadataPath = path42.join(poolDir, "metadata.json");
|
|
15995
16130
|
try {
|
|
15996
|
-
const raw = await
|
|
16131
|
+
const raw = await readFile15(metadataPath, "utf-8");
|
|
15997
16132
|
const metadata = JSON.parse(raw);
|
|
15998
16133
|
return metadata.fingerprint !== fingerprint;
|
|
15999
16134
|
} catch {
|
|
@@ -16012,13 +16147,13 @@ var WorkspacePoolManager = class {
|
|
|
16012
16147
|
}
|
|
16013
16148
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
16014
16149
|
async removeAllSlots(poolDir) {
|
|
16015
|
-
const entries = await
|
|
16150
|
+
const entries = await readdir6(poolDir);
|
|
16016
16151
|
for (const entry of entries) {
|
|
16017
16152
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
16018
16153
|
const lockPath = path42.join(poolDir, `${entry}.lock`);
|
|
16019
16154
|
if (existsSync3(lockPath)) {
|
|
16020
16155
|
try {
|
|
16021
|
-
const pidStr = await
|
|
16156
|
+
const pidStr = await readFile15(lockPath, "utf-8");
|
|
16022
16157
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
16023
16158
|
if (!Number.isNaN(pid)) {
|
|
16024
16159
|
try {
|
|
@@ -16293,14 +16428,14 @@ ${lines.join("\n")}`;
|
|
|
16293
16428
|
};
|
|
16294
16429
|
|
|
16295
16430
|
// src/evaluation/workspace/resolve.ts
|
|
16296
|
-
import { readdir as
|
|
16431
|
+
import { readdir as readdir7, stat as stat8 } from "node:fs/promises";
|
|
16297
16432
|
import path44 from "node:path";
|
|
16298
16433
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
16299
16434
|
if (!templatePath) {
|
|
16300
16435
|
return void 0;
|
|
16301
16436
|
}
|
|
16302
16437
|
const resolved = path44.resolve(templatePath);
|
|
16303
|
-
const stats = await
|
|
16438
|
+
const stats = await stat8(resolved);
|
|
16304
16439
|
if (stats.isFile()) {
|
|
16305
16440
|
return {
|
|
16306
16441
|
dir: path44.dirname(resolved),
|
|
@@ -16310,7 +16445,7 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
16310
16445
|
if (!stats.isDirectory()) {
|
|
16311
16446
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
16312
16447
|
}
|
|
16313
|
-
const entries = await
|
|
16448
|
+
const entries = await readdir7(resolved);
|
|
16314
16449
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
16315
16450
|
if (workspaceFiles.length === 1) {
|
|
16316
16451
|
return {
|
|
@@ -16416,6 +16551,100 @@ function getWorkspaceTemplate(target) {
|
|
|
16416
16551
|
}
|
|
16417
16552
|
return void 0;
|
|
16418
16553
|
}
|
|
16554
|
+
function validateDependencyGraph(tests) {
|
|
16555
|
+
const ids = /* @__PURE__ */ new Set();
|
|
16556
|
+
for (const test of tests) {
|
|
16557
|
+
if (ids.has(test.id)) {
|
|
16558
|
+
throw new Error(`Duplicate test ID '${test.id}' \u2014 each test must have a unique ID`);
|
|
16559
|
+
}
|
|
16560
|
+
ids.add(test.id);
|
|
16561
|
+
}
|
|
16562
|
+
for (const test of tests) {
|
|
16563
|
+
if (!test.depends_on) continue;
|
|
16564
|
+
for (const dep of test.depends_on) {
|
|
16565
|
+
if (!ids.has(dep)) {
|
|
16566
|
+
throw new Error(
|
|
16567
|
+
`Test '${test.id}' depends on '${dep}', but no test with that ID exists in this suite`
|
|
16568
|
+
);
|
|
16569
|
+
}
|
|
16570
|
+
if (dep === test.id) {
|
|
16571
|
+
throw new Error(`Test '${test.id}' depends on itself`);
|
|
16572
|
+
}
|
|
16573
|
+
}
|
|
16574
|
+
}
|
|
16575
|
+
const depMap = /* @__PURE__ */ new Map();
|
|
16576
|
+
for (const test of tests) {
|
|
16577
|
+
if (test.depends_on && test.depends_on.length > 0) {
|
|
16578
|
+
depMap.set(test.id, test.depends_on);
|
|
16579
|
+
}
|
|
16580
|
+
}
|
|
16581
|
+
const visited = /* @__PURE__ */ new Set();
|
|
16582
|
+
const visiting = /* @__PURE__ */ new Set();
|
|
16583
|
+
function visit(id, path53) {
|
|
16584
|
+
if (visiting.has(id)) {
|
|
16585
|
+
const cycle = [...path53.slice(path53.indexOf(id)), id];
|
|
16586
|
+
throw new Error(`Circular dependency detected: ${cycle.join(" \u2192 ")}`);
|
|
16587
|
+
}
|
|
16588
|
+
if (visited.has(id)) return;
|
|
16589
|
+
visiting.add(id);
|
|
16590
|
+
path53.push(id);
|
|
16591
|
+
for (const dep of depMap.get(id) ?? []) {
|
|
16592
|
+
visit(dep, path53);
|
|
16593
|
+
}
|
|
16594
|
+
path53.pop();
|
|
16595
|
+
visiting.delete(id);
|
|
16596
|
+
visited.add(id);
|
|
16597
|
+
}
|
|
16598
|
+
for (const test of tests) {
|
|
16599
|
+
visit(test.id, []);
|
|
16600
|
+
}
|
|
16601
|
+
}
|
|
16602
|
+
function computeWaves(tests) {
|
|
16603
|
+
const hasDeps = tests.some((t) => t.depends_on && t.depends_on.length > 0);
|
|
16604
|
+
if (!hasDeps) {
|
|
16605
|
+
return [tests.slice()];
|
|
16606
|
+
}
|
|
16607
|
+
const inDegree = /* @__PURE__ */ new Map();
|
|
16608
|
+
const dependents = /* @__PURE__ */ new Map();
|
|
16609
|
+
const testById = /* @__PURE__ */ new Map();
|
|
16610
|
+
for (const test of tests) {
|
|
16611
|
+
testById.set(test.id, test);
|
|
16612
|
+
inDegree.set(test.id, 0);
|
|
16613
|
+
}
|
|
16614
|
+
for (const test of tests) {
|
|
16615
|
+
if (!test.depends_on) continue;
|
|
16616
|
+
inDegree.set(test.id, test.depends_on.length);
|
|
16617
|
+
for (const dep of test.depends_on) {
|
|
16618
|
+
const list = dependents.get(dep) ?? [];
|
|
16619
|
+
list.push(test.id);
|
|
16620
|
+
dependents.set(dep, list);
|
|
16621
|
+
}
|
|
16622
|
+
}
|
|
16623
|
+
const waves = [];
|
|
16624
|
+
let ready = tests.filter((t) => (inDegree.get(t.id) ?? 0) === 0);
|
|
16625
|
+
while (ready.length > 0) {
|
|
16626
|
+
waves.push(ready);
|
|
16627
|
+
const nextReady = [];
|
|
16628
|
+
for (const test of ready) {
|
|
16629
|
+
for (const depId of dependents.get(test.id) ?? []) {
|
|
16630
|
+
const newDeg = (inDegree.get(depId) ?? 1) - 1;
|
|
16631
|
+
inDegree.set(depId, newDeg);
|
|
16632
|
+
if (newDeg === 0) {
|
|
16633
|
+
const depTest = testById.get(depId);
|
|
16634
|
+
if (depTest) nextReady.push(depTest);
|
|
16635
|
+
}
|
|
16636
|
+
}
|
|
16637
|
+
}
|
|
16638
|
+
ready = nextReady;
|
|
16639
|
+
}
|
|
16640
|
+
const totalScheduled = waves.reduce((sum, w) => sum + w.length, 0);
|
|
16641
|
+
if (totalScheduled !== tests.length) {
|
|
16642
|
+
throw new Error(
|
|
16643
|
+
`Internal error: ${tests.length - totalScheduled} tests were not scheduled (possible undetected cycle)`
|
|
16644
|
+
);
|
|
16645
|
+
}
|
|
16646
|
+
return waves;
|
|
16647
|
+
}
|
|
16419
16648
|
async function runEvaluation(options) {
|
|
16420
16649
|
const {
|
|
16421
16650
|
testFilePath: evalFilePath,
|
|
@@ -16683,11 +16912,11 @@ async function runEvaluation(options) {
|
|
|
16683
16912
|
let staticMaterialised = false;
|
|
16684
16913
|
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
16685
16914
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
16686
|
-
const dirExists = await
|
|
16915
|
+
const dirExists = await stat9(configuredStaticPath).then(
|
|
16687
16916
|
(s) => s.isDirectory(),
|
|
16688
16917
|
() => false
|
|
16689
16918
|
);
|
|
16690
|
-
const isEmpty = dirExists ? (await
|
|
16919
|
+
const isEmpty = dirExists ? (await readdir8(configuredStaticPath)).length === 0 : false;
|
|
16691
16920
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
16692
16921
|
if (!dirExists) {
|
|
16693
16922
|
await mkdir14(configuredStaticPath, { recursive: true });
|
|
@@ -16739,10 +16968,41 @@ async function runEvaluation(options) {
|
|
|
16739
16968
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
16740
16969
|
}
|
|
16741
16970
|
try {
|
|
16971
|
+
let toDependencyResult2 = function(r) {
|
|
16972
|
+
const outputText = extractLastAssistantContent(r.output);
|
|
16973
|
+
return {
|
|
16974
|
+
score: r.score,
|
|
16975
|
+
output: outputText,
|
|
16976
|
+
workspace_path: r.workspacePath,
|
|
16977
|
+
details: r.scores ? Object.fromEntries(
|
|
16978
|
+
r.scores.map((s) => [s.name, { score: s.score, verdict: s.verdict }])
|
|
16979
|
+
) : void 0,
|
|
16980
|
+
status: r.executionStatus === "ok" ? "passed" : r.executionStatus === "execution_error" ? "error" : "failed"
|
|
16981
|
+
};
|
|
16982
|
+
}, checkDependencies2 = function(evalCase) {
|
|
16983
|
+
const depResults = {};
|
|
16984
|
+
if (!evalCase.depends_on || evalCase.depends_on.length === 0) {
|
|
16985
|
+
return { ok: true, depResults };
|
|
16986
|
+
}
|
|
16987
|
+
let allPassed = true;
|
|
16988
|
+
for (const depId of evalCase.depends_on) {
|
|
16989
|
+
const depResult = completedResults.get(depId);
|
|
16990
|
+
if (depResult) {
|
|
16991
|
+
depResults[depId] = toDependencyResult2(depResult);
|
|
16992
|
+
if (depResult.executionStatus === "execution_error") {
|
|
16993
|
+
allPassed = false;
|
|
16994
|
+
}
|
|
16995
|
+
} else {
|
|
16996
|
+
allPassed = false;
|
|
16997
|
+
}
|
|
16998
|
+
}
|
|
16999
|
+
return { ok: allPassed, depResults };
|
|
17000
|
+
};
|
|
17001
|
+
var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2;
|
|
16742
17002
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
16743
17003
|
const copiedWorkspaceFile = path45.join(sharedWorkspacePath, path45.basename(suiteWorkspaceFile));
|
|
16744
17004
|
try {
|
|
16745
|
-
await
|
|
17005
|
+
await stat9(copiedWorkspaceFile);
|
|
16746
17006
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
16747
17007
|
} catch {
|
|
16748
17008
|
}
|
|
@@ -16849,8 +17109,9 @@ async function runEvaluation(options) {
|
|
|
16849
17109
|
try {
|
|
16850
17110
|
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
16851
17111
|
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
16852
|
-
} catch {
|
|
16853
|
-
|
|
17112
|
+
} catch (error) {
|
|
17113
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
17114
|
+
setupLog(`shared baseline initialization failed (file_changes unavailable): ${message}`);
|
|
16854
17115
|
}
|
|
16855
17116
|
}
|
|
16856
17117
|
if (availablePoolSlots.length > 0) {
|
|
@@ -16859,8 +17120,11 @@ async function runEvaluation(options) {
|
|
|
16859
17120
|
const baseline = await initializeBaseline(slot.path);
|
|
16860
17121
|
poolSlotBaselines.set(slot.path, baseline);
|
|
16861
17122
|
setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
|
|
16862
|
-
} catch {
|
|
16863
|
-
|
|
17123
|
+
} catch (error) {
|
|
17124
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
17125
|
+
setupLog(
|
|
17126
|
+
`pool slot ${slot.index} baseline initialization failed (file_changes unavailable): ${message}`
|
|
17127
|
+
);
|
|
16864
17128
|
}
|
|
16865
17129
|
}
|
|
16866
17130
|
}
|
|
@@ -16870,204 +17134,259 @@ async function runEvaluation(options) {
|
|
|
16870
17134
|
let cumulativeBudgetCost = 0;
|
|
16871
17135
|
let budgetExhausted = false;
|
|
16872
17136
|
let failOnErrorTriggered = false;
|
|
16873
|
-
|
|
16874
|
-
|
|
16875
|
-
|
|
16876
|
-
|
|
16877
|
-
|
|
16878
|
-
|
|
16879
|
-
|
|
17137
|
+
validateDependencyGraph(filteredEvalCases);
|
|
17138
|
+
const waves = computeWaves(filteredEvalCases);
|
|
17139
|
+
const completedResults = /* @__PURE__ */ new Map();
|
|
17140
|
+
const results = [];
|
|
17141
|
+
async function dispatchTest(evalCase, depResults) {
|
|
17142
|
+
const workerId = nextWorkerId++;
|
|
17143
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
17144
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
17145
|
+
const budgetResult = {
|
|
17146
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
17147
|
+
testId: evalCase.id,
|
|
17148
|
+
suite: evalCase.suite,
|
|
17149
|
+
category: evalCase.category,
|
|
17150
|
+
score: 0,
|
|
17151
|
+
assertions: [],
|
|
17152
|
+
output: [],
|
|
17153
|
+
target: target.name,
|
|
17154
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
17155
|
+
budgetExceeded: true,
|
|
17156
|
+
executionStatus: "execution_error",
|
|
17157
|
+
failureStage: "setup",
|
|
17158
|
+
failureReasonCode: "budget_exceeded",
|
|
17159
|
+
executionError: {
|
|
17160
|
+
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
17161
|
+
stage: "setup"
|
|
17162
|
+
}
|
|
17163
|
+
};
|
|
17164
|
+
if (onProgress) {
|
|
17165
|
+
await onProgress({
|
|
17166
|
+
workerId,
|
|
16880
17167
|
testId: evalCase.id,
|
|
16881
|
-
|
|
16882
|
-
|
|
16883
|
-
|
|
16884
|
-
|
|
16885
|
-
|
|
16886
|
-
|
|
16887
|
-
|
|
16888
|
-
|
|
16889
|
-
|
|
16890
|
-
|
|
16891
|
-
|
|
16892
|
-
|
|
16893
|
-
|
|
16894
|
-
|
|
17168
|
+
status: "failed",
|
|
17169
|
+
completedAt: Date.now(),
|
|
17170
|
+
error: budgetResult.error,
|
|
17171
|
+
score: budgetResult.score,
|
|
17172
|
+
executionStatus: budgetResult.executionStatus
|
|
17173
|
+
});
|
|
17174
|
+
}
|
|
17175
|
+
if (onResult) {
|
|
17176
|
+
await onResult(budgetResult);
|
|
17177
|
+
}
|
|
17178
|
+
return budgetResult;
|
|
17179
|
+
}
|
|
17180
|
+
if (failOnError === true && failOnErrorTriggered) {
|
|
17181
|
+
const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
|
|
17182
|
+
const haltResult = {
|
|
17183
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
17184
|
+
testId: evalCase.id,
|
|
17185
|
+
suite: evalCase.suite,
|
|
17186
|
+
category: evalCase.category,
|
|
17187
|
+
score: 0,
|
|
17188
|
+
assertions: [],
|
|
17189
|
+
output: [],
|
|
17190
|
+
target: target.name,
|
|
17191
|
+
error: errorMsg,
|
|
17192
|
+
executionStatus: "execution_error",
|
|
17193
|
+
failureStage: "setup",
|
|
17194
|
+
failureReasonCode: "error_threshold_exceeded",
|
|
17195
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
17196
|
+
};
|
|
17197
|
+
if (onProgress) {
|
|
17198
|
+
await onProgress({
|
|
17199
|
+
workerId,
|
|
17200
|
+
testId: evalCase.id,
|
|
17201
|
+
status: "failed",
|
|
17202
|
+
completedAt: Date.now(),
|
|
17203
|
+
error: haltResult.error,
|
|
17204
|
+
score: haltResult.score,
|
|
17205
|
+
executionStatus: haltResult.executionStatus
|
|
17206
|
+
});
|
|
17207
|
+
}
|
|
17208
|
+
if (onResult) {
|
|
17209
|
+
await onResult(haltResult);
|
|
17210
|
+
}
|
|
17211
|
+
return haltResult;
|
|
17212
|
+
}
|
|
17213
|
+
if (onProgress) {
|
|
17214
|
+
await onProgress({
|
|
17215
|
+
workerId,
|
|
17216
|
+
testId: evalCase.id,
|
|
17217
|
+
status: "running",
|
|
17218
|
+
startedAt: Date.now()
|
|
17219
|
+
});
|
|
17220
|
+
}
|
|
17221
|
+
const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
|
|
17222
|
+
const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
|
|
17223
|
+
const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
|
|
17224
|
+
try {
|
|
17225
|
+
const graderProvider = await resolveGraderProvider(target);
|
|
17226
|
+
const runCaseOptions = {
|
|
17227
|
+
evalCase,
|
|
17228
|
+
provider: primaryProvider,
|
|
17229
|
+
target,
|
|
17230
|
+
evaluators: evaluatorRegistry,
|
|
17231
|
+
maxRetries,
|
|
17232
|
+
agentTimeoutMs,
|
|
17233
|
+
cache,
|
|
17234
|
+
useCache,
|
|
17235
|
+
now,
|
|
17236
|
+
graderProvider,
|
|
17237
|
+
targetResolver,
|
|
17238
|
+
availableTargets,
|
|
17239
|
+
evalRunId,
|
|
17240
|
+
keepWorkspaces,
|
|
17241
|
+
cleanupWorkspaces,
|
|
17242
|
+
retainOnSuccess: resolvedRetainOnSuccess,
|
|
17243
|
+
retainOnFailure: resolvedRetainOnFailure,
|
|
17244
|
+
sharedWorkspacePath: testWorkspacePath,
|
|
17245
|
+
sharedBaselineCommit: testBaselineCommit,
|
|
17246
|
+
suiteWorkspaceFile,
|
|
17247
|
+
streamCallbacks,
|
|
17248
|
+
typeRegistry,
|
|
17249
|
+
repoManager,
|
|
17250
|
+
evalDir,
|
|
17251
|
+
verbose,
|
|
17252
|
+
threshold: scoreThreshold,
|
|
17253
|
+
...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
|
|
17254
|
+
};
|
|
17255
|
+
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
17256
|
+
if (totalBudgetUsd !== void 0) {
|
|
17257
|
+
let caseCost;
|
|
17258
|
+
if (result.trials && result.trials.length > 0) {
|
|
17259
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
17260
|
+
if (trialCostSum > 0) {
|
|
17261
|
+
caseCost = trialCostSum;
|
|
16895
17262
|
}
|
|
16896
|
-
}
|
|
16897
|
-
|
|
16898
|
-
await onProgress({
|
|
16899
|
-
workerId,
|
|
16900
|
-
testId: evalCase.id,
|
|
16901
|
-
status: "failed",
|
|
16902
|
-
completedAt: Date.now(),
|
|
16903
|
-
error: budgetResult.error,
|
|
16904
|
-
score: budgetResult.score,
|
|
16905
|
-
executionStatus: budgetResult.executionStatus
|
|
16906
|
-
});
|
|
17263
|
+
} else {
|
|
17264
|
+
caseCost = result.costUsd;
|
|
16907
17265
|
}
|
|
16908
|
-
if (
|
|
16909
|
-
|
|
17266
|
+
if (caseCost !== void 0) {
|
|
17267
|
+
cumulativeBudgetCost += caseCost;
|
|
17268
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
17269
|
+
budgetExhausted = true;
|
|
17270
|
+
}
|
|
16910
17271
|
}
|
|
16911
|
-
return budgetResult;
|
|
16912
17272
|
}
|
|
16913
|
-
if (failOnError === true &&
|
|
16914
|
-
|
|
16915
|
-
|
|
16916
|
-
|
|
17273
|
+
if (failOnError === true && result.executionStatus === "execution_error") {
|
|
17274
|
+
failOnErrorTriggered = true;
|
|
17275
|
+
}
|
|
17276
|
+
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
17277
|
+
result = { ...result, beforeAllOutput };
|
|
17278
|
+
beforeAllOutputAttached = true;
|
|
17279
|
+
}
|
|
17280
|
+
if (onProgress) {
|
|
17281
|
+
await onProgress({
|
|
17282
|
+
workerId,
|
|
16917
17283
|
testId: evalCase.id,
|
|
16918
|
-
|
|
16919
|
-
|
|
16920
|
-
|
|
16921
|
-
|
|
16922
|
-
|
|
16923
|
-
|
|
16924
|
-
|
|
16925
|
-
|
|
16926
|
-
failureStage: "setup",
|
|
16927
|
-
failureReasonCode: "error_threshold_exceeded",
|
|
16928
|
-
executionError: { message: errorMsg, stage: "setup" }
|
|
16929
|
-
};
|
|
16930
|
-
if (onProgress) {
|
|
16931
|
-
await onProgress({
|
|
16932
|
-
workerId,
|
|
16933
|
-
testId: evalCase.id,
|
|
16934
|
-
status: "failed",
|
|
16935
|
-
completedAt: Date.now(),
|
|
16936
|
-
error: haltResult.error,
|
|
16937
|
-
score: haltResult.score,
|
|
16938
|
-
executionStatus: haltResult.executionStatus
|
|
16939
|
-
});
|
|
16940
|
-
}
|
|
16941
|
-
if (onResult) {
|
|
16942
|
-
await onResult(haltResult);
|
|
16943
|
-
}
|
|
16944
|
-
return haltResult;
|
|
17284
|
+
status: result.error ? "failed" : "completed",
|
|
17285
|
+
startedAt: 0,
|
|
17286
|
+
// Not used for completed status
|
|
17287
|
+
completedAt: Date.now(),
|
|
17288
|
+
error: result.error,
|
|
17289
|
+
score: result.score,
|
|
17290
|
+
executionStatus: result.executionStatus
|
|
17291
|
+
});
|
|
16945
17292
|
}
|
|
17293
|
+
if (onResult) {
|
|
17294
|
+
await onResult(result);
|
|
17295
|
+
}
|
|
17296
|
+
return result;
|
|
17297
|
+
} catch (error) {
|
|
16946
17298
|
if (onProgress) {
|
|
16947
17299
|
await onProgress({
|
|
16948
17300
|
workerId,
|
|
16949
17301
|
testId: evalCase.id,
|
|
16950
|
-
status: "
|
|
16951
|
-
|
|
17302
|
+
status: "failed",
|
|
17303
|
+
completedAt: Date.now(),
|
|
17304
|
+
error: error instanceof Error ? error.message : String(error)
|
|
16952
17305
|
});
|
|
16953
17306
|
}
|
|
16954
|
-
|
|
16955
|
-
|
|
16956
|
-
|
|
16957
|
-
|
|
16958
|
-
|
|
16959
|
-
|
|
16960
|
-
|
|
16961
|
-
|
|
16962
|
-
|
|
16963
|
-
|
|
16964
|
-
|
|
16965
|
-
|
|
16966
|
-
|
|
16967
|
-
|
|
16968
|
-
|
|
16969
|
-
|
|
16970
|
-
|
|
16971
|
-
|
|
16972
|
-
|
|
16973
|
-
|
|
16974
|
-
|
|
16975
|
-
|
|
16976
|
-
|
|
16977
|
-
|
|
16978
|
-
|
|
16979
|
-
|
|
16980
|
-
|
|
16981
|
-
|
|
16982
|
-
|
|
16983
|
-
|
|
16984
|
-
|
|
16985
|
-
|
|
16986
|
-
|
|
16987
|
-
|
|
16988
|
-
|
|
16989
|
-
|
|
16990
|
-
|
|
16991
|
-
|
|
16992
|
-
|
|
16993
|
-
|
|
16994
|
-
|
|
16995
|
-
|
|
16996
|
-
|
|
16997
|
-
|
|
16998
|
-
|
|
16999
|
-
|
|
17000
|
-
|
|
17001
|
-
|
|
17307
|
+
throw error;
|
|
17308
|
+
} finally {
|
|
17309
|
+
if (testPoolSlot) {
|
|
17310
|
+
availablePoolSlots.push(testPoolSlot);
|
|
17311
|
+
}
|
|
17312
|
+
}
|
|
17313
|
+
}
|
|
17314
|
+
for (const wave of waves) {
|
|
17315
|
+
const wavePromises = wave.map(
|
|
17316
|
+
(evalCase) => limit(async () => {
|
|
17317
|
+
if (evalCase.depends_on && evalCase.depends_on.length > 0) {
|
|
17318
|
+
const { ok, depResults } = checkDependencies2(evalCase);
|
|
17319
|
+
if (!ok) {
|
|
17320
|
+
const policy = evalCase.on_dependency_failure ?? "skip";
|
|
17321
|
+
if (policy === "skip" || policy === "fail") {
|
|
17322
|
+
const failedDeps = evalCase.depends_on.filter(
|
|
17323
|
+
(d) => completedResults.get(d)?.executionStatus === "execution_error"
|
|
17324
|
+
);
|
|
17325
|
+
const prefix = policy === "skip" ? "Skipped" : "Failed";
|
|
17326
|
+
const errorMsg = `${prefix}: dependency failed (${failedDeps.join(", ")})`;
|
|
17327
|
+
const depFailResult = {
|
|
17328
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
17329
|
+
testId: evalCase.id,
|
|
17330
|
+
suite: evalCase.suite,
|
|
17331
|
+
category: evalCase.category,
|
|
17332
|
+
score: 0,
|
|
17333
|
+
assertions: [],
|
|
17334
|
+
output: [],
|
|
17335
|
+
target: target.name,
|
|
17336
|
+
error: errorMsg,
|
|
17337
|
+
executionStatus: "execution_error",
|
|
17338
|
+
failureStage: "setup",
|
|
17339
|
+
failureReasonCode: "dependency_failed",
|
|
17340
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
17341
|
+
};
|
|
17342
|
+
if (onProgress) {
|
|
17343
|
+
await onProgress({
|
|
17344
|
+
workerId: nextWorkerId++,
|
|
17345
|
+
testId: evalCase.id,
|
|
17346
|
+
status: "failed",
|
|
17347
|
+
completedAt: Date.now(),
|
|
17348
|
+
error: depFailResult.error,
|
|
17349
|
+
score: 0,
|
|
17350
|
+
executionStatus: depFailResult.executionStatus
|
|
17351
|
+
});
|
|
17352
|
+
}
|
|
17353
|
+
if (onResult) {
|
|
17354
|
+
await onResult(depFailResult);
|
|
17355
|
+
}
|
|
17356
|
+
return depFailResult;
|
|
17002
17357
|
}
|
|
17003
17358
|
}
|
|
17359
|
+
return dispatchTest(evalCase, depResults);
|
|
17004
17360
|
}
|
|
17005
|
-
|
|
17006
|
-
|
|
17007
|
-
|
|
17008
|
-
|
|
17009
|
-
|
|
17010
|
-
|
|
17011
|
-
|
|
17012
|
-
|
|
17013
|
-
|
|
17014
|
-
|
|
17015
|
-
|
|
17016
|
-
|
|
17017
|
-
|
|
17018
|
-
|
|
17019
|
-
|
|
17020
|
-
|
|
17021
|
-
|
|
17022
|
-
|
|
17023
|
-
|
|
17024
|
-
|
|
17361
|
+
return dispatchTest(evalCase);
|
|
17362
|
+
})
|
|
17363
|
+
);
|
|
17364
|
+
const settled = await Promise.allSettled(wavePromises);
|
|
17365
|
+
for (let i = 0; i < settled.length; i++) {
|
|
17366
|
+
const outcome = settled[i];
|
|
17367
|
+
const evalCase = wave[i];
|
|
17368
|
+
if (outcome.status === "fulfilled") {
|
|
17369
|
+
completedResults.set(evalCase.id, outcome.value);
|
|
17370
|
+
results.push(outcome.value);
|
|
17371
|
+
} else {
|
|
17372
|
+
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
17373
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
17374
|
+
const errorResult = buildErrorResult(
|
|
17375
|
+
evalCase,
|
|
17376
|
+
target.name,
|
|
17377
|
+
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
17378
|
+
outcome.reason,
|
|
17379
|
+
promptInputs,
|
|
17380
|
+
primaryProvider,
|
|
17381
|
+
"agent",
|
|
17382
|
+
"provider_error",
|
|
17383
|
+
verbose
|
|
17384
|
+
);
|
|
17385
|
+
completedResults.set(evalCase.id, errorResult);
|
|
17386
|
+
results.push(errorResult);
|
|
17025
17387
|
if (onResult) {
|
|
17026
|
-
await onResult(
|
|
17388
|
+
await onResult(errorResult);
|
|
17027
17389
|
}
|
|
17028
|
-
return result;
|
|
17029
|
-
} catch (error) {
|
|
17030
|
-
if (onProgress) {
|
|
17031
|
-
await onProgress({
|
|
17032
|
-
workerId,
|
|
17033
|
-
testId: evalCase.id,
|
|
17034
|
-
status: "failed",
|
|
17035
|
-
completedAt: Date.now(),
|
|
17036
|
-
error: error instanceof Error ? error.message : String(error)
|
|
17037
|
-
});
|
|
17038
|
-
}
|
|
17039
|
-
throw error;
|
|
17040
|
-
} finally {
|
|
17041
|
-
if (testPoolSlot) {
|
|
17042
|
-
availablePoolSlots.push(testPoolSlot);
|
|
17043
|
-
}
|
|
17044
|
-
}
|
|
17045
|
-
})
|
|
17046
|
-
);
|
|
17047
|
-
const settled = await Promise.allSettled(promises);
|
|
17048
|
-
const results = [];
|
|
17049
|
-
for (let i = 0; i < settled.length; i++) {
|
|
17050
|
-
const outcome = settled[i];
|
|
17051
|
-
if (outcome.status === "fulfilled") {
|
|
17052
|
-
results.push(outcome.value);
|
|
17053
|
-
} else {
|
|
17054
|
-
const evalCase = filteredEvalCases[i];
|
|
17055
|
-
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
17056
|
-
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
17057
|
-
const errorResult = buildErrorResult(
|
|
17058
|
-
evalCase,
|
|
17059
|
-
target.name,
|
|
17060
|
-
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
17061
|
-
outcome.reason,
|
|
17062
|
-
promptInputs,
|
|
17063
|
-
primaryProvider,
|
|
17064
|
-
"agent",
|
|
17065
|
-
"provider_error",
|
|
17066
|
-
verbose
|
|
17067
|
-
);
|
|
17068
|
-
results.push(errorResult);
|
|
17069
|
-
if (onResult) {
|
|
17070
|
-
await onResult(errorResult);
|
|
17071
17390
|
}
|
|
17072
17391
|
}
|
|
17073
17392
|
}
|
|
@@ -17312,7 +17631,8 @@ async function runEvalCase(options) {
|
|
|
17312
17631
|
repoManager,
|
|
17313
17632
|
evalDir,
|
|
17314
17633
|
verbose,
|
|
17315
|
-
threshold: caseThreshold
|
|
17634
|
+
threshold: caseThreshold,
|
|
17635
|
+
dependencyResults
|
|
17316
17636
|
} = options;
|
|
17317
17637
|
const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
|
|
17318
17638
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
@@ -17356,7 +17676,7 @@ async function runEvalCase(options) {
|
|
|
17356
17676
|
if (caseWorkspaceFile && workspacePath) {
|
|
17357
17677
|
const copiedFile = path45.join(workspacePath, path45.basename(caseWorkspaceFile));
|
|
17358
17678
|
try {
|
|
17359
|
-
await
|
|
17679
|
+
await stat9(copiedFile);
|
|
17360
17680
|
caseWorkspaceFile = copiedFile;
|
|
17361
17681
|
} catch {
|
|
17362
17682
|
}
|
|
@@ -17520,7 +17840,11 @@ async function runEvalCase(options) {
|
|
|
17520
17840
|
if (!baselineCommit && workspacePath) {
|
|
17521
17841
|
try {
|
|
17522
17842
|
baselineCommit = await initializeBaseline(workspacePath);
|
|
17523
|
-
} catch {
|
|
17843
|
+
} catch (error) {
|
|
17844
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
17845
|
+
if (verbose) {
|
|
17846
|
+
console.warn(`[setup] test=${evalCase.id} baseline initialization failed: ${message}`);
|
|
17847
|
+
}
|
|
17524
17848
|
}
|
|
17525
17849
|
}
|
|
17526
17850
|
const caseStartMs = Date.now();
|
|
@@ -17631,6 +17955,11 @@ async function runEvalCase(options) {
|
|
|
17631
17955
|
} catch {
|
|
17632
17956
|
}
|
|
17633
17957
|
}
|
|
17958
|
+
const providerFileChanges = providerResponse?.fileChanges;
|
|
17959
|
+
if (providerFileChanges) {
|
|
17960
|
+
fileChanges = fileChanges ? `${fileChanges}
|
|
17961
|
+
${providerFileChanges}` : providerFileChanges;
|
|
17962
|
+
}
|
|
17634
17963
|
const providerError = extractProviderError(providerResponse);
|
|
17635
17964
|
if (caseHooksEnabled && repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
|
|
17636
17965
|
try {
|
|
@@ -17688,7 +18017,8 @@ async function runEvalCase(options) {
|
|
|
17688
18017
|
workspacePath,
|
|
17689
18018
|
dockerConfig: evalCase.workspace?.docker,
|
|
17690
18019
|
verbose,
|
|
17691
|
-
threshold: evalCase.threshold ?? caseThreshold
|
|
18020
|
+
threshold: evalCase.threshold ?? caseThreshold,
|
|
18021
|
+
dependencyResults
|
|
17692
18022
|
});
|
|
17693
18023
|
const effectiveThreshold = evalCase.threshold ?? caseThreshold;
|
|
17694
18024
|
const totalDurationMs = Date.now() - caseStartMs;
|
|
@@ -17881,7 +18211,8 @@ async function evaluateCandidate(options) {
|
|
|
17881
18211
|
fileChanges,
|
|
17882
18212
|
workspacePath,
|
|
17883
18213
|
dockerConfig,
|
|
17884
|
-
threshold: evalThreshold
|
|
18214
|
+
threshold: evalThreshold,
|
|
18215
|
+
dependencyResults
|
|
17885
18216
|
} = options;
|
|
17886
18217
|
const gradeTimestamp = nowFn();
|
|
17887
18218
|
const { score, scores } = await runEvaluatorsForCase({
|
|
@@ -17908,7 +18239,8 @@ async function evaluateCandidate(options) {
|
|
|
17908
18239
|
fileChanges,
|
|
17909
18240
|
workspacePath,
|
|
17910
18241
|
dockerConfig,
|
|
17911
|
-
threshold: evalThreshold
|
|
18242
|
+
threshold: evalThreshold,
|
|
18243
|
+
dependencyResults
|
|
17912
18244
|
});
|
|
17913
18245
|
const completedAt = nowFn();
|
|
17914
18246
|
let agentRequest;
|
|
@@ -17984,7 +18316,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
17984
18316
|
fileChanges,
|
|
17985
18317
|
workspacePath,
|
|
17986
18318
|
dockerConfig,
|
|
17987
|
-
threshold
|
|
18319
|
+
threshold,
|
|
18320
|
+
dependencyResults
|
|
17988
18321
|
} = options;
|
|
17989
18322
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
17990
18323
|
return runEvaluatorList({
|
|
@@ -18012,7 +18345,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
18012
18345
|
fileChanges,
|
|
18013
18346
|
workspacePath,
|
|
18014
18347
|
dockerConfig,
|
|
18015
|
-
threshold
|
|
18348
|
+
threshold,
|
|
18349
|
+
dependencyResults
|
|
18016
18350
|
});
|
|
18017
18351
|
}
|
|
18018
18352
|
const evaluatorKind = evalCase.evaluator ?? "llm-grader";
|
|
@@ -18042,6 +18376,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
18042
18376
|
fileChanges,
|
|
18043
18377
|
workspacePath,
|
|
18044
18378
|
dockerConfig,
|
|
18379
|
+
dependencyResults,
|
|
18045
18380
|
...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
|
|
18046
18381
|
});
|
|
18047
18382
|
return { score };
|
|
@@ -18081,7 +18416,8 @@ async function runEvaluatorList(options) {
|
|
|
18081
18416
|
availableTargets,
|
|
18082
18417
|
fileChanges,
|
|
18083
18418
|
workspacePath,
|
|
18084
|
-
dockerConfig
|
|
18419
|
+
dockerConfig,
|
|
18420
|
+
dependencyResults
|
|
18085
18421
|
} = options;
|
|
18086
18422
|
const scored = [];
|
|
18087
18423
|
const scores = [];
|
|
@@ -18105,7 +18441,8 @@ async function runEvaluatorList(options) {
|
|
|
18105
18441
|
availableTargets,
|
|
18106
18442
|
fileChanges,
|
|
18107
18443
|
workspacePath,
|
|
18108
|
-
dockerConfig
|
|
18444
|
+
dockerConfig,
|
|
18445
|
+
dependencyResults
|
|
18109
18446
|
};
|
|
18110
18447
|
const evalFileDir = evalCase.file_paths[0] ? path45.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
18111
18448
|
const dispatchContext = {
|
|
@@ -18824,7 +19161,7 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
18824
19161
|
}
|
|
18825
19162
|
|
|
18826
19163
|
// src/evaluation/workspace/deps-scanner.ts
|
|
18827
|
-
import { readFile as
|
|
19164
|
+
import { readFile as readFile16 } from "node:fs/promises";
|
|
18828
19165
|
import path47 from "node:path";
|
|
18829
19166
|
import { parse as parse6 } from "yaml";
|
|
18830
19167
|
function normalizeGitUrl(url) {
|
|
@@ -18872,7 +19209,7 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
18872
19209
|
return { repos: [...seen.values()], errors };
|
|
18873
19210
|
}
|
|
18874
19211
|
async function extractReposFromEvalFile(filePath) {
|
|
18875
|
-
const content = await
|
|
19212
|
+
const content = await readFile16(filePath, "utf8");
|
|
18876
19213
|
const parsed = interpolateEnv(parse6(content), process.env);
|
|
18877
19214
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18878
19215
|
const obj = parsed;
|
|
@@ -18893,7 +19230,7 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
18893
19230
|
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
18894
19231
|
if (typeof raw === "string") {
|
|
18895
19232
|
const workspaceFilePath = path47.resolve(evalFileDir, raw);
|
|
18896
|
-
const content = await
|
|
19233
|
+
const content = await readFile16(workspaceFilePath, "utf8");
|
|
18897
19234
|
const parsed = interpolateEnv(parse6(content), process.env);
|
|
18898
19235
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
18899
19236
|
return extractReposFromObject(parsed);
|
|
@@ -18921,7 +19258,7 @@ function extractReposFromObject(obj) {
|
|
|
18921
19258
|
}
|
|
18922
19259
|
|
|
18923
19260
|
// src/evaluation/cache/response-cache.ts
|
|
18924
|
-
import { mkdir as mkdir15, readFile as
|
|
19261
|
+
import { mkdir as mkdir15, readFile as readFile17, writeFile as writeFile8 } from "node:fs/promises";
|
|
18925
19262
|
import path48 from "node:path";
|
|
18926
19263
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
18927
19264
|
var ResponseCache = class {
|
|
@@ -18932,7 +19269,7 @@ var ResponseCache = class {
|
|
|
18932
19269
|
async get(key) {
|
|
18933
19270
|
const filePath = this.keyToPath(key);
|
|
18934
19271
|
try {
|
|
18935
|
-
const data = await
|
|
19272
|
+
const data = await readFile17(filePath, "utf8");
|
|
18936
19273
|
return JSON.parse(data);
|
|
18937
19274
|
} catch {
|
|
18938
19275
|
return void 0;
|
|
@@ -18964,7 +19301,7 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
18964
19301
|
// src/evaluation/results-repo.ts
|
|
18965
19302
|
import { execFile as execFile3 } from "node:child_process";
|
|
18966
19303
|
import { existsSync as existsSync7, mkdirSync as mkdirSync2, readFileSync as readFileSync3, rmSync, writeFileSync } from "node:fs";
|
|
18967
|
-
import { cp as cp3, mkdtemp as mkdtemp3, readdir as
|
|
19304
|
+
import { cp as cp3, mkdtemp as mkdtemp3, readdir as readdir9, rm as rm6, stat as stat10 } from "node:fs/promises";
|
|
18968
19305
|
import os3 from "node:os";
|
|
18969
19306
|
import path49 from "node:path";
|
|
18970
19307
|
import { promisify as promisify7 } from "node:util";
|
|
@@ -19187,12 +19524,12 @@ function resolveResultsRepoRunsDir(config) {
|
|
|
19187
19524
|
);
|
|
19188
19525
|
}
|
|
19189
19526
|
async function directorySizeBytes(targetPath) {
|
|
19190
|
-
const entry = await
|
|
19527
|
+
const entry = await stat10(targetPath);
|
|
19191
19528
|
if (entry.isFile()) {
|
|
19192
19529
|
return entry.size;
|
|
19193
19530
|
}
|
|
19194
19531
|
let total = 0;
|
|
19195
|
-
for (const child of await
|
|
19532
|
+
for (const child of await readdir9(targetPath, { withFileTypes: true })) {
|
|
19196
19533
|
total += await directorySizeBytes(path49.join(targetPath, child.name));
|
|
19197
19534
|
}
|
|
19198
19535
|
return total;
|
|
@@ -20259,17 +20596,17 @@ function extractResponseItemContent(content) {
|
|
|
20259
20596
|
}
|
|
20260
20597
|
|
|
20261
20598
|
// src/import/codex-session-discovery.ts
|
|
20262
|
-
import { readdir as
|
|
20263
|
-
import { homedir as
|
|
20599
|
+
import { readdir as readdir10, stat as stat11 } from "node:fs/promises";
|
|
20600
|
+
import { homedir as homedir5 } from "node:os";
|
|
20264
20601
|
import path51 from "node:path";
|
|
20265
|
-
var DEFAULT_SESSIONS_DIR = () => path51.join(
|
|
20602
|
+
var DEFAULT_SESSIONS_DIR = () => path51.join(homedir5(), ".codex", "sessions");
|
|
20266
20603
|
async function discoverCodexSessions(opts) {
|
|
20267
20604
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
20268
20605
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
20269
20606
|
const sessions = [];
|
|
20270
20607
|
let yearDirs;
|
|
20271
20608
|
try {
|
|
20272
|
-
yearDirs = await
|
|
20609
|
+
yearDirs = await readdir10(sessionsDir);
|
|
20273
20610
|
} catch {
|
|
20274
20611
|
return [];
|
|
20275
20612
|
}
|
|
@@ -20277,7 +20614,7 @@ async function discoverCodexSessions(opts) {
|
|
|
20277
20614
|
const yearPath = path51.join(sessionsDir, year);
|
|
20278
20615
|
let monthDirs;
|
|
20279
20616
|
try {
|
|
20280
|
-
monthDirs = await
|
|
20617
|
+
monthDirs = await readdir10(yearPath);
|
|
20281
20618
|
} catch {
|
|
20282
20619
|
continue;
|
|
20283
20620
|
}
|
|
@@ -20285,7 +20622,7 @@ async function discoverCodexSessions(opts) {
|
|
|
20285
20622
|
const monthPath = path51.join(yearPath, month);
|
|
20286
20623
|
let dayDirs;
|
|
20287
20624
|
try {
|
|
20288
|
-
dayDirs = await
|
|
20625
|
+
dayDirs = await readdir10(monthPath);
|
|
20289
20626
|
} catch {
|
|
20290
20627
|
continue;
|
|
20291
20628
|
}
|
|
@@ -20297,7 +20634,7 @@ async function discoverCodexSessions(opts) {
|
|
|
20297
20634
|
const dayPath = path51.join(monthPath, day);
|
|
20298
20635
|
let files;
|
|
20299
20636
|
try {
|
|
20300
|
-
files = await
|
|
20637
|
+
files = await readdir10(dayPath);
|
|
20301
20638
|
} catch {
|
|
20302
20639
|
continue;
|
|
20303
20640
|
}
|
|
@@ -20309,7 +20646,7 @@ async function discoverCodexSessions(opts) {
|
|
|
20309
20646
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
20310
20647
|
let updatedAt;
|
|
20311
20648
|
try {
|
|
20312
|
-
const fileStat = await
|
|
20649
|
+
const fileStat = await stat11(filePath);
|
|
20313
20650
|
updatedAt = fileStat.mtime;
|
|
20314
20651
|
} catch {
|
|
20315
20652
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -20324,10 +20661,10 @@ async function discoverCodexSessions(opts) {
|
|
|
20324
20661
|
}
|
|
20325
20662
|
|
|
20326
20663
|
// src/import/session-discovery.ts
|
|
20327
|
-
import { readdir as
|
|
20328
|
-
import { homedir as
|
|
20664
|
+
import { readdir as readdir11, stat as stat12 } from "node:fs/promises";
|
|
20665
|
+
import { homedir as homedir6 } from "node:os";
|
|
20329
20666
|
import path52 from "node:path";
|
|
20330
|
-
var DEFAULT_PROJECTS_DIR = () => path52.join(
|
|
20667
|
+
var DEFAULT_PROJECTS_DIR = () => path52.join(homedir6(), ".claude", "projects");
|
|
20331
20668
|
function encodeProjectPath(projectPath) {
|
|
20332
20669
|
return projectPath.replace(/\//g, "-");
|
|
20333
20670
|
}
|
|
@@ -20336,7 +20673,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
20336
20673
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
20337
20674
|
let projectDirs;
|
|
20338
20675
|
try {
|
|
20339
|
-
projectDirs = await
|
|
20676
|
+
projectDirs = await readdir11(projectsDir);
|
|
20340
20677
|
} catch {
|
|
20341
20678
|
return [];
|
|
20342
20679
|
}
|
|
@@ -20349,7 +20686,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
20349
20686
|
const dirPath = path52.join(projectsDir, projectDir);
|
|
20350
20687
|
let entries;
|
|
20351
20688
|
try {
|
|
20352
|
-
entries = await
|
|
20689
|
+
entries = await readdir11(dirPath);
|
|
20353
20690
|
} catch {
|
|
20354
20691
|
continue;
|
|
20355
20692
|
}
|
|
@@ -20360,7 +20697,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
20360
20697
|
const filePath = path52.join(dirPath, entry);
|
|
20361
20698
|
let updatedAt;
|
|
20362
20699
|
try {
|
|
20363
|
-
const fileStat = await
|
|
20700
|
+
const fileStat = await stat12(filePath);
|
|
20364
20701
|
updatedAt = fileStat.mtime;
|
|
20365
20702
|
} catch {
|
|
20366
20703
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -20378,7 +20715,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
20378
20715
|
}
|
|
20379
20716
|
|
|
20380
20717
|
// src/import/types.ts
|
|
20381
|
-
import { readFile as
|
|
20718
|
+
import { readFile as readFile18 } from "node:fs/promises";
|
|
20382
20719
|
function toTranscriptJsonLine(entry) {
|
|
20383
20720
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
20384
20721
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -20404,11 +20741,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
20404
20741
|
};
|
|
20405
20742
|
}
|
|
20406
20743
|
async function readTranscriptJsonl(filePath) {
|
|
20407
|
-
const text = await
|
|
20744
|
+
const text = await readFile18(filePath, "utf8");
|
|
20408
20745
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
20409
20746
|
}
|
|
20410
20747
|
async function readTranscriptFile(filePath) {
|
|
20411
|
-
return
|
|
20748
|
+
return readFile18(filePath, "utf8");
|
|
20412
20749
|
}
|
|
20413
20750
|
|
|
20414
20751
|
// src/import/transcript-provider.ts
|