@agentv/core 4.13.0 → 4.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-SWLNU3I6.js → chunk-A3HYVKTI.js} +1 -1
- package/dist/chunk-A3HYVKTI.js.map +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +997 -660
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +38 -1
- package/dist/index.d.ts +38 -1
- package/dist/index.js +853 -516
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-SWLNU3I6.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -140,7 +140,7 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
140
140
|
});
|
|
141
141
|
}
|
|
142
142
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
143
|
-
const { mkdir: mkdir17, readFile:
|
|
143
|
+
const { mkdir: mkdir17, readFile: readFile21, rm: rm7, writeFile: writeFile9 } = await import("fs/promises");
|
|
144
144
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
145
145
|
const path56 = await import("path");
|
|
146
146
|
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
@@ -178,8 +178,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
178
178
|
resolve(code ?? 0);
|
|
179
179
|
});
|
|
180
180
|
});
|
|
181
|
-
const stdout = (await
|
|
182
|
-
const stderr = (await
|
|
181
|
+
const stdout = (await readFile21(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
182
|
+
const stderr = (await readFile21(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
183
183
|
return { stdout, stderr, exitCode };
|
|
184
184
|
} finally {
|
|
185
185
|
await rm7(dir, { recursive: true, force: true });
|
|
@@ -1698,12 +1698,12 @@ function serializeAttributeValue(value) {
|
|
|
1698
1698
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1699
1699
|
return { stringValue: String(value) };
|
|
1700
1700
|
}
|
|
1701
|
-
var
|
|
1701
|
+
var import_promises40, import_node_path55, OtlpJsonFileExporter;
|
|
1702
1702
|
var init_otlp_json_file_exporter = __esm({
|
|
1703
1703
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1704
1704
|
"use strict";
|
|
1705
1705
|
init_cjs_shims();
|
|
1706
|
-
|
|
1706
|
+
import_promises40 = require("fs/promises");
|
|
1707
1707
|
import_node_path55 = require("path");
|
|
1708
1708
|
OtlpJsonFileExporter = class {
|
|
1709
1709
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
@@ -1743,7 +1743,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1743
1743
|
}
|
|
1744
1744
|
async flush() {
|
|
1745
1745
|
if (this.spans.length === 0) return;
|
|
1746
|
-
await (0,
|
|
1746
|
+
await (0, import_promises40.mkdir)((0, import_node_path55.dirname)(this.filePath), { recursive: true });
|
|
1747
1747
|
const otlpJson = {
|
|
1748
1748
|
resourceSpans: [
|
|
1749
1749
|
{
|
|
@@ -5751,7 +5751,11 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
5751
5751
|
if (format === "agent-skills-json") {
|
|
5752
5752
|
return { tests: await loadTestsFromAgentSkills(evalFilePath) };
|
|
5753
5753
|
}
|
|
5754
|
-
const { tests, parsed } = await loadTestsFromYaml(
|
|
5754
|
+
const { tests, parsed, suiteWorkspacePath } = await loadTestsFromYaml(
|
|
5755
|
+
evalFilePath,
|
|
5756
|
+
repoRoot,
|
|
5757
|
+
options
|
|
5758
|
+
);
|
|
5755
5759
|
const metadata = parseMetadata(parsed);
|
|
5756
5760
|
const failOnError = extractFailOnError(parsed);
|
|
5757
5761
|
const threshold = extractThreshold(parsed);
|
|
@@ -5764,7 +5768,8 @@ async function loadTestSuite(evalFilePath, repoRoot, options) {
|
|
|
5764
5768
|
totalBudgetUsd: extractTotalBudgetUsd(parsed),
|
|
5765
5769
|
...metadata !== void 0 && { metadata },
|
|
5766
5770
|
...failOnError !== void 0 && { failOnError },
|
|
5767
|
-
...threshold !== void 0 && { threshold }
|
|
5771
|
+
...threshold !== void 0 && { threshold },
|
|
5772
|
+
...suiteWorkspacePath !== void 0 && { workspacePath: suiteWorkspacePath }
|
|
5768
5773
|
};
|
|
5769
5774
|
}
|
|
5770
5775
|
var loadEvalSuite = loadTestSuite;
|
|
@@ -5926,6 +5931,11 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
5926
5931
|
const mergedWorkspace = mergeWorkspaceConfigs(suiteWorkspace, caseWorkspace);
|
|
5927
5932
|
const metadata = isJsonObject(testCaseConfig.metadata) ? testCaseConfig.metadata : void 0;
|
|
5928
5933
|
const caseTargets = extractTargetsFromTestCase(testCaseConfig);
|
|
5934
|
+
const dependsOn = Array.isArray(testCaseConfig.depends_on) ? testCaseConfig.depends_on.filter(
|
|
5935
|
+
(v) => typeof v === "string"
|
|
5936
|
+
) : void 0;
|
|
5937
|
+
const onDependencyFailureRaw = asString5(testCaseConfig.on_dependency_failure);
|
|
5938
|
+
const onDependencyFailure = onDependencyFailureRaw === "skip" || onDependencyFailureRaw === "fail" || onDependencyFailureRaw === "run" ? onDependencyFailureRaw : void 0;
|
|
5929
5939
|
const testCase = {
|
|
5930
5940
|
id,
|
|
5931
5941
|
suite: suiteName,
|
|
@@ -5943,11 +5953,13 @@ async function loadTestsFromYaml(evalFilePath, repoRoot, options) {
|
|
|
5943
5953
|
workspace: mergedWorkspace,
|
|
5944
5954
|
metadata,
|
|
5945
5955
|
targets: caseTargets,
|
|
5946
|
-
...caseThreshold !== void 0 ? { threshold: caseThreshold } : {}
|
|
5956
|
+
...caseThreshold !== void 0 ? { threshold: caseThreshold } : {},
|
|
5957
|
+
...dependsOn && dependsOn.length > 0 ? { depends_on: dependsOn } : {},
|
|
5958
|
+
...onDependencyFailure ? { on_dependency_failure: onDependencyFailure } : {}
|
|
5947
5959
|
};
|
|
5948
5960
|
results.push(testCase);
|
|
5949
5961
|
}
|
|
5950
|
-
return { tests: results, parsed: suite };
|
|
5962
|
+
return { tests: results, parsed: suite, suiteWorkspacePath: suiteWorkspace?.path };
|
|
5951
5963
|
}
|
|
5952
5964
|
async function loadTestById(evalFilePath, repoRoot, evalId) {
|
|
5953
5965
|
const tests = await loadTests(evalFilePath, repoRoot);
|
|
@@ -8925,12 +8937,125 @@ function formatElapsed3(startedAt) {
|
|
|
8925
8937
|
// src/evaluation/providers/copilot-cli.ts
|
|
8926
8938
|
init_cjs_shims();
|
|
8927
8939
|
var import_node_crypto5 = require("crypto");
|
|
8928
|
-
var
|
|
8929
|
-
var
|
|
8940
|
+
var import_promises17 = require("fs/promises");
|
|
8941
|
+
var import_node_os3 = require("os");
|
|
8942
|
+
var import_node_path19 = __toESM(require("path"), 1);
|
|
8930
8943
|
var import_node_stream = require("stream");
|
|
8931
|
-
var
|
|
8944
|
+
var import_node_child_process4 = require("child_process");
|
|
8932
8945
|
var acp = __toESM(require("@agentclientprotocol/sdk"), 1);
|
|
8933
8946
|
|
|
8947
|
+
// src/evaluation/workspace/file-changes.ts
|
|
8948
|
+
init_cjs_shims();
|
|
8949
|
+
var import_node_child_process3 = require("child_process");
|
|
8950
|
+
var import_node_fs7 = require("fs");
|
|
8951
|
+
var import_promises16 = require("fs/promises");
|
|
8952
|
+
var import_node_path17 = __toESM(require("path"), 1);
|
|
8953
|
+
var import_node_util2 = require("util");
|
|
8954
|
+
var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process3.exec);
|
|
8955
|
+
var SNAPSHOT_MAX_FILE_BYTES = 512 * 1024;
|
|
8956
|
+
var SNAPSHOT_EXCLUDE_DIRS = /* @__PURE__ */ new Set([".git", "node_modules", ".agentv", "__pycache__"]);
|
|
8957
|
+
function gitExecOpts(workspacePath) {
|
|
8958
|
+
const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
|
|
8959
|
+
return { cwd: workspacePath, env };
|
|
8960
|
+
}
|
|
8961
|
+
async function initializeBaseline(workspacePath) {
|
|
8962
|
+
const opts = gitExecOpts(workspacePath);
|
|
8963
|
+
await execAsync2("git init", opts);
|
|
8964
|
+
await execAsync2("git add -A", opts);
|
|
8965
|
+
await execAsync2(
|
|
8966
|
+
'git -c user.email=agentv@localhost -c user.name=agentv commit --allow-empty -m "agentv-baseline"',
|
|
8967
|
+
opts
|
|
8968
|
+
);
|
|
8969
|
+
const { stdout } = await execAsync2("git rev-parse HEAD", opts);
|
|
8970
|
+
return stdout.trim();
|
|
8971
|
+
}
|
|
8972
|
+
async function captureFileChanges(workspacePath, baselineCommit) {
|
|
8973
|
+
const opts = gitExecOpts(workspacePath);
|
|
8974
|
+
await stageNestedRepoChanges(workspacePath);
|
|
8975
|
+
await execAsync2("git add -A", opts);
|
|
8976
|
+
const { stdout } = await execAsync2(`git diff ${baselineCommit} --submodule=diff`, opts);
|
|
8977
|
+
return stdout.trim();
|
|
8978
|
+
}
|
|
8979
|
+
async function stageNestedRepoChanges(workspacePath) {
|
|
8980
|
+
let entries;
|
|
8981
|
+
try {
|
|
8982
|
+
entries = (0, import_node_fs7.readdirSync)(workspacePath);
|
|
8983
|
+
} catch {
|
|
8984
|
+
return;
|
|
8985
|
+
}
|
|
8986
|
+
for (const entry of entries) {
|
|
8987
|
+
if (entry === ".git" || entry === "node_modules") continue;
|
|
8988
|
+
const childPath = import_node_path17.default.join(workspacePath, entry);
|
|
8989
|
+
try {
|
|
8990
|
+
if (!(0, import_node_fs7.statSync)(childPath).isDirectory()) continue;
|
|
8991
|
+
if (!(0, import_node_fs7.statSync)(import_node_path17.default.join(childPath, ".git")).isDirectory()) continue;
|
|
8992
|
+
} catch {
|
|
8993
|
+
continue;
|
|
8994
|
+
}
|
|
8995
|
+
const childOpts = gitExecOpts(childPath);
|
|
8996
|
+
await execAsync2("git add -A", childOpts);
|
|
8997
|
+
}
|
|
8998
|
+
}
|
|
8999
|
+
async function captureSnapshot(dir) {
|
|
9000
|
+
const snapshot = /* @__PURE__ */ new Map();
|
|
9001
|
+
await walkDir(dir, dir, snapshot);
|
|
9002
|
+
return snapshot;
|
|
9003
|
+
}
|
|
9004
|
+
async function walkDir(rootDir, currentDir, snapshot) {
|
|
9005
|
+
let entries;
|
|
9006
|
+
try {
|
|
9007
|
+
entries = await (0, import_promises16.readdir)(currentDir);
|
|
9008
|
+
} catch {
|
|
9009
|
+
return;
|
|
9010
|
+
}
|
|
9011
|
+
for (const entry of entries) {
|
|
9012
|
+
if (SNAPSHOT_EXCLUDE_DIRS.has(entry)) continue;
|
|
9013
|
+
const fullPath = import_node_path17.default.join(currentDir, entry);
|
|
9014
|
+
let fileStat;
|
|
9015
|
+
try {
|
|
9016
|
+
fileStat = await (0, import_promises16.stat)(fullPath);
|
|
9017
|
+
} catch {
|
|
9018
|
+
continue;
|
|
9019
|
+
}
|
|
9020
|
+
if (fileStat.isDirectory()) {
|
|
9021
|
+
await walkDir(rootDir, fullPath, snapshot);
|
|
9022
|
+
} else if (fileStat.isFile()) {
|
|
9023
|
+
if (fileStat.size > SNAPSHOT_MAX_FILE_BYTES) continue;
|
|
9024
|
+
let content;
|
|
9025
|
+
try {
|
|
9026
|
+
content = await (0, import_promises16.readFile)(fullPath, "utf8");
|
|
9027
|
+
if (content.includes("\0")) continue;
|
|
9028
|
+
} catch {
|
|
9029
|
+
continue;
|
|
9030
|
+
}
|
|
9031
|
+
const relativePath = import_node_path17.default.relative(rootDir, fullPath).replace(/\\/g, "/");
|
|
9032
|
+
snapshot.set(relativePath, content);
|
|
9033
|
+
}
|
|
9034
|
+
}
|
|
9035
|
+
}
|
|
9036
|
+
function generateNewFileDiff(relativePath, content) {
|
|
9037
|
+
const lines = content.endsWith("\n") ? content.slice(0, -1).split("\n") : content.split("\n");
|
|
9038
|
+
const addedLines = lines.map((l) => `+${l}`).join("\n");
|
|
9039
|
+
return [
|
|
9040
|
+
`diff --git a/${relativePath} b/${relativePath}`,
|
|
9041
|
+
"new file mode 100644",
|
|
9042
|
+
"--- /dev/null",
|
|
9043
|
+
`+++ b/${relativePath}`,
|
|
9044
|
+
`@@ -0,0 +1,${lines.length} @@`,
|
|
9045
|
+
addedLines
|
|
9046
|
+
].join("\n");
|
|
9047
|
+
}
|
|
9048
|
+
async function captureSessionArtifacts(filesDir, pathPrefix = "") {
|
|
9049
|
+
const snapshot = await captureSnapshot(filesDir).catch(() => void 0);
|
|
9050
|
+
if (!snapshot || snapshot.size === 0) return void 0;
|
|
9051
|
+
const parts = [];
|
|
9052
|
+
for (const [relPath, content] of snapshot) {
|
|
9053
|
+
const displayPath = pathPrefix ? `${pathPrefix}/${relPath}` : relPath;
|
|
9054
|
+
parts.push(generateNewFileDiff(displayPath, content));
|
|
9055
|
+
}
|
|
9056
|
+
return parts.join("\n");
|
|
9057
|
+
}
|
|
9058
|
+
|
|
8934
9059
|
// src/evaluation/providers/copilot-cli-log-tracker.ts
|
|
8935
9060
|
init_cjs_shims();
|
|
8936
9061
|
var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.copilotCliLogs");
|
|
@@ -8988,9 +9113,9 @@ function subscribeToCopilotCliLogEntries(listener) {
|
|
|
8988
9113
|
// src/evaluation/providers/copilot-utils.ts
|
|
8989
9114
|
init_cjs_shims();
|
|
8990
9115
|
var import_node_crypto4 = require("crypto");
|
|
8991
|
-
var
|
|
9116
|
+
var import_node_fs8 = require("fs");
|
|
8992
9117
|
var import_node_os2 = require("os");
|
|
8993
|
-
var
|
|
9118
|
+
var import_node_path18 = __toESM(require("path"), 1);
|
|
8994
9119
|
var import_node_url3 = require("url");
|
|
8995
9120
|
var import_meta = {};
|
|
8996
9121
|
function resolvePlatformCliPath() {
|
|
@@ -9015,30 +9140,30 @@ function resolvePlatformCliPath() {
|
|
|
9015
9140
|
try {
|
|
9016
9141
|
const resolved = import_meta.resolve(`${packageName}/package.json`);
|
|
9017
9142
|
const packageJsonPath = resolved.startsWith("file:") ? (0, import_node_url3.fileURLToPath)(resolved) : resolved;
|
|
9018
|
-
const binaryPath =
|
|
9019
|
-
if ((0,
|
|
9143
|
+
const binaryPath = import_node_path18.default.join(import_node_path18.default.dirname(packageJsonPath), binaryName);
|
|
9144
|
+
if ((0, import_node_fs8.existsSync)(binaryPath)) {
|
|
9020
9145
|
return binaryPath;
|
|
9021
9146
|
}
|
|
9022
9147
|
} catch {
|
|
9023
9148
|
}
|
|
9024
9149
|
let searchDir = process.cwd();
|
|
9025
9150
|
for (let i = 0; i < 10; i++) {
|
|
9026
|
-
const standardPath =
|
|
9151
|
+
const standardPath = import_node_path18.default.join(
|
|
9027
9152
|
searchDir,
|
|
9028
9153
|
"node_modules",
|
|
9029
9154
|
...packageName.split("/"),
|
|
9030
9155
|
binaryName
|
|
9031
9156
|
);
|
|
9032
|
-
if ((0,
|
|
9157
|
+
if ((0, import_node_fs8.existsSync)(standardPath)) {
|
|
9033
9158
|
return standardPath;
|
|
9034
9159
|
}
|
|
9035
|
-
const bunDir =
|
|
9160
|
+
const bunDir = import_node_path18.default.join(searchDir, "node_modules", ".bun");
|
|
9036
9161
|
const prefix = `@github+copilot-${osPart}-${archPart}@`;
|
|
9037
9162
|
try {
|
|
9038
|
-
const entries = (0,
|
|
9163
|
+
const entries = (0, import_node_fs8.readdirSync)(bunDir);
|
|
9039
9164
|
for (const entry of entries) {
|
|
9040
9165
|
if (entry.startsWith(prefix)) {
|
|
9041
|
-
const candidate =
|
|
9166
|
+
const candidate = import_node_path18.default.join(
|
|
9042
9167
|
bunDir,
|
|
9043
9168
|
entry,
|
|
9044
9169
|
"node_modules",
|
|
@@ -9046,23 +9171,23 @@ function resolvePlatformCliPath() {
|
|
|
9046
9171
|
`copilot-${osPart}-${archPart}`,
|
|
9047
9172
|
binaryName
|
|
9048
9173
|
);
|
|
9049
|
-
if ((0,
|
|
9174
|
+
if ((0, import_node_fs8.existsSync)(candidate)) {
|
|
9050
9175
|
return candidate;
|
|
9051
9176
|
}
|
|
9052
9177
|
}
|
|
9053
9178
|
}
|
|
9054
9179
|
} catch {
|
|
9055
9180
|
}
|
|
9056
|
-
const parent =
|
|
9181
|
+
const parent = import_node_path18.default.dirname(searchDir);
|
|
9057
9182
|
if (parent === searchDir) break;
|
|
9058
9183
|
searchDir = parent;
|
|
9059
9184
|
}
|
|
9060
9185
|
for (const root of globalNpmRoots()) {
|
|
9061
|
-
const hoisted =
|
|
9062
|
-
if ((0,
|
|
9186
|
+
const hoisted = import_node_path18.default.join(root, "@github", `copilot-${osPart}-${archPart}`, binaryName);
|
|
9187
|
+
if ((0, import_node_fs8.existsSync)(hoisted)) {
|
|
9063
9188
|
return hoisted;
|
|
9064
9189
|
}
|
|
9065
|
-
const nested =
|
|
9190
|
+
const nested = import_node_path18.default.join(
|
|
9066
9191
|
root,
|
|
9067
9192
|
"@github",
|
|
9068
9193
|
"copilot",
|
|
@@ -9071,7 +9196,7 @@ function resolvePlatformCliPath() {
|
|
|
9071
9196
|
`copilot-${osPart}-${archPart}`,
|
|
9072
9197
|
binaryName
|
|
9073
9198
|
);
|
|
9074
|
-
if ((0,
|
|
9199
|
+
if ((0, import_node_fs8.existsSync)(nested)) {
|
|
9075
9200
|
return nested;
|
|
9076
9201
|
}
|
|
9077
9202
|
}
|
|
@@ -9083,20 +9208,20 @@ function globalNpmRoots() {
|
|
|
9083
9208
|
const home = (0, import_node_os2.homedir)();
|
|
9084
9209
|
if (os4 === "win32") {
|
|
9085
9210
|
if (process.env.APPDATA) {
|
|
9086
|
-
roots.push(
|
|
9211
|
+
roots.push(import_node_path18.default.join(process.env.APPDATA, "npm", "node_modules"));
|
|
9087
9212
|
}
|
|
9088
|
-
roots.push(
|
|
9213
|
+
roots.push(import_node_path18.default.join(home, "AppData", "Roaming", "npm", "node_modules"));
|
|
9089
9214
|
} else {
|
|
9090
9215
|
roots.push("/opt/homebrew/lib/node_modules");
|
|
9091
9216
|
roots.push("/usr/local/lib/node_modules");
|
|
9092
9217
|
roots.push("/usr/lib/node_modules");
|
|
9093
|
-
roots.push(
|
|
9094
|
-
roots.push(
|
|
9218
|
+
roots.push(import_node_path18.default.join(home, ".npm-global", "lib", "node_modules"));
|
|
9219
|
+
roots.push(import_node_path18.default.join(home, ".local", "lib", "node_modules"));
|
|
9095
9220
|
}
|
|
9096
9221
|
if (process.env.npm_config_prefix) {
|
|
9097
9222
|
const prefix = process.env.npm_config_prefix;
|
|
9098
9223
|
roots.push(
|
|
9099
|
-
os4 === "win32" ?
|
|
9224
|
+
os4 === "win32" ? import_node_path18.default.join(prefix, "node_modules") : import_node_path18.default.join(prefix, "lib", "node_modules")
|
|
9100
9225
|
);
|
|
9101
9226
|
}
|
|
9102
9227
|
return Array.from(new Set(roots));
|
|
@@ -9143,14 +9268,22 @@ var CopilotStreamLogger = class _CopilotStreamLogger {
|
|
|
9143
9268
|
startedAt = Date.now();
|
|
9144
9269
|
format;
|
|
9145
9270
|
summarize;
|
|
9146
|
-
|
|
9271
|
+
chunkExtractor;
|
|
9272
|
+
pendingText = "";
|
|
9273
|
+
constructor(filePath, format, summarize, chunkExtractor) {
|
|
9147
9274
|
this.filePath = filePath;
|
|
9148
9275
|
this.format = format;
|
|
9149
9276
|
this.summarize = summarize;
|
|
9150
|
-
this.
|
|
9277
|
+
this.chunkExtractor = chunkExtractor;
|
|
9278
|
+
this.stream = (0, import_node_fs8.createWriteStream)(filePath, { flags: "a" });
|
|
9151
9279
|
}
|
|
9152
9280
|
static async create(options, summarize) {
|
|
9153
|
-
const logger = new _CopilotStreamLogger(
|
|
9281
|
+
const logger = new _CopilotStreamLogger(
|
|
9282
|
+
options.filePath,
|
|
9283
|
+
options.format,
|
|
9284
|
+
summarize,
|
|
9285
|
+
options.chunkExtractor
|
|
9286
|
+
);
|
|
9154
9287
|
const header = [
|
|
9155
9288
|
`# ${options.headerLabel} stream log`,
|
|
9156
9289
|
`# target: ${options.targetName}`,
|
|
@@ -9166,19 +9299,42 @@ var CopilotStreamLogger = class _CopilotStreamLogger {
|
|
|
9166
9299
|
return logger;
|
|
9167
9300
|
}
|
|
9168
9301
|
handleEvent(eventType, data) {
|
|
9169
|
-
const elapsed = formatElapsed4(this.startedAt);
|
|
9170
9302
|
if (this.format === "json") {
|
|
9171
|
-
|
|
9172
|
-
|
|
9173
|
-
} else {
|
|
9174
|
-
const summary = this.summarize(eventType, data);
|
|
9175
|
-
if (summary) {
|
|
9176
|
-
this.stream.write(`[+${elapsed}] [${eventType}] ${summary}
|
|
9303
|
+
const elapsed2 = formatElapsed4(this.startedAt);
|
|
9304
|
+
this.stream.write(`${JSON.stringify({ time: elapsed2, event: eventType, data })}
|
|
9177
9305
|
`);
|
|
9306
|
+
return;
|
|
9307
|
+
}
|
|
9308
|
+
if (this.chunkExtractor) {
|
|
9309
|
+
const chunkText = this.chunkExtractor(eventType, data);
|
|
9310
|
+
if (chunkText === null) {
|
|
9311
|
+
this.pendingText = "";
|
|
9312
|
+
return;
|
|
9313
|
+
}
|
|
9314
|
+
if (chunkText !== void 0) {
|
|
9315
|
+
this.pendingText += chunkText;
|
|
9316
|
+
return;
|
|
9178
9317
|
}
|
|
9318
|
+
this.flushPendingText();
|
|
9319
|
+
}
|
|
9320
|
+
const elapsed = formatElapsed4(this.startedAt);
|
|
9321
|
+
const summary = this.summarize(eventType, data);
|
|
9322
|
+
if (summary) {
|
|
9323
|
+
this.stream.write(`[+${elapsed}] [${eventType}] ${summary}
|
|
9324
|
+
`);
|
|
9179
9325
|
}
|
|
9180
9326
|
}
|
|
9327
|
+
flushPendingText() {
|
|
9328
|
+
if (!this.pendingText) return;
|
|
9329
|
+
const elapsed = formatElapsed4(this.startedAt);
|
|
9330
|
+
this.stream.write(`[+${elapsed}] [assistant_message] ${this.pendingText}
|
|
9331
|
+
`);
|
|
9332
|
+
this.pendingText = "";
|
|
9333
|
+
}
|
|
9181
9334
|
async close() {
|
|
9335
|
+
if (this.format !== "json") {
|
|
9336
|
+
this.flushPendingText();
|
|
9337
|
+
}
|
|
9182
9338
|
await new Promise((resolve, reject) => {
|
|
9183
9339
|
this.stream.once("error", reject);
|
|
9184
9340
|
this.stream.end(() => resolve());
|
|
@@ -9215,7 +9371,7 @@ var CopilotCliProvider = class {
|
|
|
9215
9371
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
9216
9372
|
const executable = this.resolveExecutable();
|
|
9217
9373
|
const args = this.buildCliArgs();
|
|
9218
|
-
const agentProcess = (0,
|
|
9374
|
+
const agentProcess = (0, import_node_child_process4.spawn)(executable, args, {
|
|
9219
9375
|
stdio: ["pipe", "pipe", "inherit"]
|
|
9220
9376
|
});
|
|
9221
9377
|
await waitForProcessSpawn(agentProcess, executable, this.targetName);
|
|
@@ -9387,6 +9543,10 @@ var CopilotCliProvider = class {
|
|
|
9387
9543
|
content: finalContent
|
|
9388
9544
|
});
|
|
9389
9545
|
}
|
|
9546
|
+
const sessionId = session.sessionId;
|
|
9547
|
+
const fileChanges = sessionId ? await captureSessionArtifacts(
|
|
9548
|
+
import_node_path19.default.join((0, import_node_os3.homedir)(), ".copilot", "session-state", sessionId, "files")
|
|
9549
|
+
).catch(() => void 0) : void 0;
|
|
9390
9550
|
return {
|
|
9391
9551
|
raw: {
|
|
9392
9552
|
model: this.config.model,
|
|
@@ -9398,7 +9558,8 @@ var CopilotCliProvider = class {
|
|
|
9398
9558
|
costUsd,
|
|
9399
9559
|
durationMs,
|
|
9400
9560
|
startTime,
|
|
9401
|
-
endTime
|
|
9561
|
+
endTime,
|
|
9562
|
+
...fileChanges ? { fileChanges } : {}
|
|
9402
9563
|
};
|
|
9403
9564
|
} finally {
|
|
9404
9565
|
await logger?.close();
|
|
@@ -9439,10 +9600,10 @@ var CopilotCliProvider = class {
|
|
|
9439
9600
|
}
|
|
9440
9601
|
resolveCwd(cwdOverride) {
|
|
9441
9602
|
if (cwdOverride) {
|
|
9442
|
-
return
|
|
9603
|
+
return import_node_path19.default.resolve(cwdOverride);
|
|
9443
9604
|
}
|
|
9444
9605
|
if (this.config.cwd) {
|
|
9445
|
-
return
|
|
9606
|
+
return import_node_path19.default.resolve(this.config.cwd);
|
|
9446
9607
|
}
|
|
9447
9608
|
return void 0;
|
|
9448
9609
|
}
|
|
@@ -9461,9 +9622,9 @@ var CopilotCliProvider = class {
|
|
|
9461
9622
|
return void 0;
|
|
9462
9623
|
}
|
|
9463
9624
|
if (this.config.logDir) {
|
|
9464
|
-
return
|
|
9625
|
+
return import_node_path19.default.resolve(this.config.logDir);
|
|
9465
9626
|
}
|
|
9466
|
-
return
|
|
9627
|
+
return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
9467
9628
|
}
|
|
9468
9629
|
async createStreamLogger(request) {
|
|
9469
9630
|
const logDir = this.resolveLogDirectory();
|
|
@@ -9471,13 +9632,13 @@ var CopilotCliProvider = class {
|
|
|
9471
9632
|
return void 0;
|
|
9472
9633
|
}
|
|
9473
9634
|
try {
|
|
9474
|
-
await (0,
|
|
9635
|
+
await (0, import_promises17.mkdir)(logDir, { recursive: true });
|
|
9475
9636
|
} catch (error) {
|
|
9476
9637
|
const message = error instanceof Error ? error.message : String(error);
|
|
9477
9638
|
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
9478
9639
|
return void 0;
|
|
9479
9640
|
}
|
|
9480
|
-
const filePath =
|
|
9641
|
+
const filePath = import_node_path19.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-cli"));
|
|
9481
9642
|
try {
|
|
9482
9643
|
const logger = await CopilotStreamLogger.create(
|
|
9483
9644
|
{
|
|
@@ -9486,7 +9647,8 @@ var CopilotCliProvider = class {
|
|
|
9486
9647
|
evalCaseId: request.evalCaseId,
|
|
9487
9648
|
attempt: request.attempt,
|
|
9488
9649
|
format: this.config.logFormat ?? "summary",
|
|
9489
|
-
headerLabel: "Copilot CLI (ACP)"
|
|
9650
|
+
headerLabel: "Copilot CLI (ACP)",
|
|
9651
|
+
chunkExtractor: extractAcpChunk
|
|
9490
9652
|
},
|
|
9491
9653
|
summarizeAcpEvent
|
|
9492
9654
|
);
|
|
@@ -9545,6 +9707,14 @@ Fix options:
|
|
|
9545
9707
|
- In .env: COPILOT_EXE=C:\\Users\\<you>\\AppData\\Roaming\\npm\\node_modules\\@github\\copilot-win32-x64\\copilot.exe
|
|
9546
9708
|
- In .agentv/targets.yaml: executable: \${{ COPILOT_EXE }}`;
|
|
9547
9709
|
}
|
|
9710
|
+
function extractAcpChunk(eventType, data) {
|
|
9711
|
+
if (eventType === "agent_thought_chunk") return null;
|
|
9712
|
+
if (eventType !== "agent_message_chunk") return void 0;
|
|
9713
|
+
if (!data || typeof data !== "object") return void 0;
|
|
9714
|
+
const d = data;
|
|
9715
|
+
const content = d.content;
|
|
9716
|
+
return content?.type === "text" && typeof content.text === "string" ? content.text : void 0;
|
|
9717
|
+
}
|
|
9548
9718
|
function summarizeAcpEvent(eventType, data) {
|
|
9549
9719
|
if (!data || typeof data !== "object") {
|
|
9550
9720
|
return eventType;
|
|
@@ -9571,9 +9741,9 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
9571
9741
|
|
|
9572
9742
|
// src/evaluation/providers/copilot-log.ts
|
|
9573
9743
|
init_cjs_shims();
|
|
9574
|
-
var
|
|
9575
|
-
var
|
|
9576
|
-
var
|
|
9744
|
+
var import_promises19 = require("fs/promises");
|
|
9745
|
+
var import_node_os5 = require("os");
|
|
9746
|
+
var import_node_path21 = __toESM(require("path"), 1);
|
|
9577
9747
|
|
|
9578
9748
|
// src/evaluation/providers/copilot-log-parser.ts
|
|
9579
9749
|
init_cjs_shims();
|
|
@@ -9707,32 +9877,32 @@ function parseCopilotEvents(eventsJsonl) {
|
|
|
9707
9877
|
|
|
9708
9878
|
// src/evaluation/providers/copilot-session-discovery.ts
|
|
9709
9879
|
init_cjs_shims();
|
|
9710
|
-
var
|
|
9711
|
-
var
|
|
9712
|
-
var
|
|
9880
|
+
var import_promises18 = require("fs/promises");
|
|
9881
|
+
var import_node_os4 = require("os");
|
|
9882
|
+
var import_node_path20 = __toESM(require("path"), 1);
|
|
9713
9883
|
var import_yaml7 = require("yaml");
|
|
9714
|
-
var DEFAULT_SESSION_STATE_DIR = () =>
|
|
9884
|
+
var DEFAULT_SESSION_STATE_DIR = () => import_node_path20.default.join((0, import_node_os4.homedir)(), ".copilot", "session-state");
|
|
9715
9885
|
async function discoverCopilotSessions(opts) {
|
|
9716
9886
|
const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
|
|
9717
9887
|
const limit = opts?.limit ?? 10;
|
|
9718
9888
|
let entries;
|
|
9719
9889
|
try {
|
|
9720
|
-
entries = await (0,
|
|
9890
|
+
entries = await (0, import_promises18.readdir)(sessionStateDir);
|
|
9721
9891
|
} catch {
|
|
9722
9892
|
return [];
|
|
9723
9893
|
}
|
|
9724
9894
|
const sessions = [];
|
|
9725
9895
|
for (const entry of entries) {
|
|
9726
|
-
const sessionDir =
|
|
9727
|
-
const workspacePath =
|
|
9728
|
-
const eventsPath =
|
|
9896
|
+
const sessionDir = import_node_path20.default.join(sessionStateDir, entry);
|
|
9897
|
+
const workspacePath = import_node_path20.default.join(sessionDir, "workspace.yaml");
|
|
9898
|
+
const eventsPath = import_node_path20.default.join(sessionDir, "events.jsonl");
|
|
9729
9899
|
try {
|
|
9730
|
-
const workspaceContent = await (0,
|
|
9900
|
+
const workspaceContent = await (0, import_promises18.readFile)(workspacePath, "utf8");
|
|
9731
9901
|
const workspace = (0, import_yaml7.parse)(workspaceContent) ?? {};
|
|
9732
9902
|
const cwd = String(workspace.cwd ?? "");
|
|
9733
9903
|
let updatedAt;
|
|
9734
9904
|
try {
|
|
9735
|
-
const eventsStat = await (0,
|
|
9905
|
+
const eventsStat = await (0, import_promises18.stat)(eventsPath);
|
|
9736
9906
|
updatedAt = eventsStat.mtime;
|
|
9737
9907
|
} catch {
|
|
9738
9908
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -9786,21 +9956,24 @@ var CopilotLogProvider = class {
|
|
|
9786
9956
|
}
|
|
9787
9957
|
async invoke(_request) {
|
|
9788
9958
|
const sessionDir = await this.resolveSessionDir();
|
|
9789
|
-
const eventsPath =
|
|
9959
|
+
const eventsPath = import_node_path21.default.join(sessionDir, "events.jsonl");
|
|
9790
9960
|
let eventsContent;
|
|
9791
9961
|
try {
|
|
9792
|
-
eventsContent = await (0,
|
|
9962
|
+
eventsContent = await (0, import_promises19.readFile)(eventsPath, "utf8");
|
|
9793
9963
|
} catch (err) {
|
|
9794
9964
|
throw new Error(
|
|
9795
9965
|
`Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
9796
9966
|
);
|
|
9797
9967
|
}
|
|
9798
9968
|
const parsed = parseCopilotEvents(eventsContent);
|
|
9969
|
+
const filesDir = import_node_path21.default.join(sessionDir, "files");
|
|
9970
|
+
const fileChanges = await captureSessionArtifacts(filesDir).catch(() => void 0);
|
|
9799
9971
|
return {
|
|
9800
9972
|
output: parsed.messages,
|
|
9801
9973
|
tokenUsage: parsed.tokenUsage,
|
|
9802
9974
|
durationMs: parsed.durationMs,
|
|
9803
|
-
startTime: parsed.meta.startedAt
|
|
9975
|
+
startTime: parsed.meta.startedAt,
|
|
9976
|
+
...fileChanges ? { fileChanges } : {}
|
|
9804
9977
|
};
|
|
9805
9978
|
}
|
|
9806
9979
|
async resolveSessionDir() {
|
|
@@ -9808,8 +9981,8 @@ var CopilotLogProvider = class {
|
|
|
9808
9981
|
return this.config.sessionDir;
|
|
9809
9982
|
}
|
|
9810
9983
|
if (this.config.sessionId) {
|
|
9811
|
-
const stateDir = this.config.sessionStateDir ??
|
|
9812
|
-
return
|
|
9984
|
+
const stateDir = this.config.sessionStateDir ?? import_node_path21.default.join((0, import_node_os5.homedir)(), ".copilot", "session-state");
|
|
9985
|
+
return import_node_path21.default.join(stateDir, this.config.sessionId);
|
|
9813
9986
|
}
|
|
9814
9987
|
if (this.config.discover === "latest") {
|
|
9815
9988
|
const sessions = await discoverCopilotSessions({
|
|
@@ -9833,9 +10006,9 @@ var CopilotLogProvider = class {
|
|
|
9833
10006
|
// src/evaluation/providers/copilot-sdk.ts
|
|
9834
10007
|
init_cjs_shims();
|
|
9835
10008
|
var import_node_crypto6 = require("crypto");
|
|
9836
|
-
var
|
|
9837
|
-
var
|
|
9838
|
-
var
|
|
10009
|
+
var import_node_fs9 = require("fs");
|
|
10010
|
+
var import_promises20 = require("fs/promises");
|
|
10011
|
+
var import_node_path22 = __toESM(require("path"), 1);
|
|
9839
10012
|
|
|
9840
10013
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
9841
10014
|
init_cjs_shims();
|
|
@@ -10082,6 +10255,10 @@ var CopilotSdkProvider = class {
|
|
|
10082
10255
|
content: finalContent
|
|
10083
10256
|
});
|
|
10084
10257
|
}
|
|
10258
|
+
const sessionWorkspacePath = session.workspacePath;
|
|
10259
|
+
const fileChanges = sessionWorkspacePath ? await captureSessionArtifacts(import_node_path22.default.join(sessionWorkspacePath, "files")).catch(
|
|
10260
|
+
() => void 0
|
|
10261
|
+
) : void 0;
|
|
10085
10262
|
return {
|
|
10086
10263
|
raw: {
|
|
10087
10264
|
model: this.config.model,
|
|
@@ -10093,7 +10270,8 @@ var CopilotSdkProvider = class {
|
|
|
10093
10270
|
costUsd,
|
|
10094
10271
|
durationMs,
|
|
10095
10272
|
startTime,
|
|
10096
|
-
endTime
|
|
10273
|
+
endTime,
|
|
10274
|
+
...fileChanges ? { fileChanges } : {}
|
|
10097
10275
|
};
|
|
10098
10276
|
} finally {
|
|
10099
10277
|
unsubscribe();
|
|
@@ -10146,10 +10324,10 @@ var CopilotSdkProvider = class {
|
|
|
10146
10324
|
}
|
|
10147
10325
|
resolveCwd(cwdOverride) {
|
|
10148
10326
|
if (cwdOverride) {
|
|
10149
|
-
return
|
|
10327
|
+
return import_node_path22.default.resolve(cwdOverride);
|
|
10150
10328
|
}
|
|
10151
10329
|
if (this.config.cwd) {
|
|
10152
|
-
return
|
|
10330
|
+
return import_node_path22.default.resolve(this.config.cwd);
|
|
10153
10331
|
}
|
|
10154
10332
|
return void 0;
|
|
10155
10333
|
}
|
|
@@ -10158,9 +10336,9 @@ var CopilotSdkProvider = class {
|
|
|
10158
10336
|
return void 0;
|
|
10159
10337
|
}
|
|
10160
10338
|
if (this.config.logDir) {
|
|
10161
|
-
return
|
|
10339
|
+
return import_node_path22.default.resolve(this.config.logDir);
|
|
10162
10340
|
}
|
|
10163
|
-
return
|
|
10341
|
+
return import_node_path22.default.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
10164
10342
|
}
|
|
10165
10343
|
async createStreamLogger(request) {
|
|
10166
10344
|
const logDir = this.resolveLogDirectory();
|
|
@@ -10168,13 +10346,13 @@ var CopilotSdkProvider = class {
|
|
|
10168
10346
|
return void 0;
|
|
10169
10347
|
}
|
|
10170
10348
|
try {
|
|
10171
|
-
await (0,
|
|
10349
|
+
await (0, import_promises20.mkdir)(logDir, { recursive: true });
|
|
10172
10350
|
} catch (error) {
|
|
10173
10351
|
const message = error instanceof Error ? error.message : String(error);
|
|
10174
10352
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
10175
10353
|
return void 0;
|
|
10176
10354
|
}
|
|
10177
|
-
const filePath =
|
|
10355
|
+
const filePath = import_node_path22.default.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
10178
10356
|
try {
|
|
10179
10357
|
const logger = await CopilotStreamLogger.create(
|
|
10180
10358
|
{
|
|
@@ -10183,7 +10361,8 @@ var CopilotSdkProvider = class {
|
|
|
10183
10361
|
evalCaseId: request.evalCaseId,
|
|
10184
10362
|
attempt: request.attempt,
|
|
10185
10363
|
format: this.config.logFormat ?? "summary",
|
|
10186
|
-
headerLabel: "Copilot SDK"
|
|
10364
|
+
headerLabel: "Copilot SDK",
|
|
10365
|
+
chunkExtractor: extractSdkChunk
|
|
10187
10366
|
},
|
|
10188
10367
|
summarizeSdkEvent
|
|
10189
10368
|
);
|
|
@@ -10203,11 +10382,11 @@ var CopilotSdkProvider = class {
|
|
|
10203
10382
|
};
|
|
10204
10383
|
function resolveSkillDirectories(cwd) {
|
|
10205
10384
|
const candidates = [
|
|
10206
|
-
|
|
10207
|
-
|
|
10208
|
-
|
|
10385
|
+
import_node_path22.default.join(cwd, ".claude", "skills"),
|
|
10386
|
+
import_node_path22.default.join(cwd, ".agents", "skills"),
|
|
10387
|
+
import_node_path22.default.join(cwd, ".codex", "skills")
|
|
10209
10388
|
];
|
|
10210
|
-
return candidates.filter((dir) => (0,
|
|
10389
|
+
return candidates.filter((dir) => (0, import_node_fs9.existsSync)(dir));
|
|
10211
10390
|
}
|
|
10212
10391
|
function normalizeByokBaseUrl(baseUrl, type) {
|
|
10213
10392
|
const trimmed = baseUrl.trim().replace(/\/+$/, "");
|
|
@@ -10219,6 +10398,12 @@ function normalizeByokBaseUrl(baseUrl, type) {
|
|
|
10219
10398
|
}
|
|
10220
10399
|
return trimmed;
|
|
10221
10400
|
}
|
|
10401
|
+
function extractSdkChunk(eventType, data) {
|
|
10402
|
+
if (eventType !== "assistant.message_delta") return void 0;
|
|
10403
|
+
if (!data || typeof data !== "object") return void 0;
|
|
10404
|
+
const d = data;
|
|
10405
|
+
return typeof d.deltaContent === "string" ? d.deltaContent : void 0;
|
|
10406
|
+
}
|
|
10222
10407
|
function summarizeSdkEvent(eventType, data) {
|
|
10223
10408
|
if (!data || typeof data !== "object") {
|
|
10224
10409
|
return eventType;
|
|
@@ -10286,12 +10471,12 @@ var MockProvider = class {
|
|
|
10286
10471
|
|
|
10287
10472
|
// src/evaluation/providers/pi-cli.ts
|
|
10288
10473
|
init_cjs_shims();
|
|
10289
|
-
var
|
|
10474
|
+
var import_node_child_process5 = require("child_process");
|
|
10290
10475
|
var import_node_crypto7 = require("crypto");
|
|
10291
|
-
var
|
|
10292
|
-
var
|
|
10293
|
-
var
|
|
10294
|
-
var
|
|
10476
|
+
var import_node_fs10 = require("fs");
|
|
10477
|
+
var import_promises21 = require("fs/promises");
|
|
10478
|
+
var import_node_os6 = require("os");
|
|
10479
|
+
var import_node_path23 = __toESM(require("path"), 1);
|
|
10295
10480
|
|
|
10296
10481
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
10297
10482
|
init_cjs_shims();
|
|
@@ -10500,8 +10685,8 @@ var PiCliProvider = class {
|
|
|
10500
10685
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
10501
10686
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
10502
10687
|
try {
|
|
10503
|
-
const promptFile =
|
|
10504
|
-
await (0,
|
|
10688
|
+
const promptFile = import_node_path23.default.join(cwd, PROMPT_FILENAME);
|
|
10689
|
+
await (0, import_promises21.writeFile)(promptFile, request.question, "utf8");
|
|
10505
10690
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
10506
10691
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
10507
10692
|
if (result.timedOut) {
|
|
@@ -10563,10 +10748,10 @@ var PiCliProvider = class {
|
|
|
10563
10748
|
}
|
|
10564
10749
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
10565
10750
|
if (cwdOverride) {
|
|
10566
|
-
return
|
|
10751
|
+
return import_node_path23.default.resolve(cwdOverride);
|
|
10567
10752
|
}
|
|
10568
10753
|
if (this.config.cwd) {
|
|
10569
|
-
return
|
|
10754
|
+
return import_node_path23.default.resolve(this.config.cwd);
|
|
10570
10755
|
}
|
|
10571
10756
|
if (workspaceRoot) {
|
|
10572
10757
|
return workspaceRoot;
|
|
@@ -10672,19 +10857,19 @@ ${prompt}` : prompt;
|
|
|
10672
10857
|
return env;
|
|
10673
10858
|
}
|
|
10674
10859
|
async createWorkspace() {
|
|
10675
|
-
return await (0,
|
|
10860
|
+
return await (0, import_promises21.mkdtemp)(import_node_path23.default.join((0, import_node_os6.tmpdir)(), WORKSPACE_PREFIX));
|
|
10676
10861
|
}
|
|
10677
10862
|
async cleanupWorkspace(workspaceRoot) {
|
|
10678
10863
|
try {
|
|
10679
|
-
await (0,
|
|
10864
|
+
await (0, import_promises21.rm)(workspaceRoot, { recursive: true, force: true });
|
|
10680
10865
|
} catch {
|
|
10681
10866
|
}
|
|
10682
10867
|
}
|
|
10683
10868
|
resolveLogDirectory() {
|
|
10684
10869
|
if (this.config.logDir) {
|
|
10685
|
-
return
|
|
10870
|
+
return import_node_path23.default.resolve(this.config.logDir);
|
|
10686
10871
|
}
|
|
10687
|
-
return
|
|
10872
|
+
return import_node_path23.default.join(process.cwd(), ".agentv", "logs", "pi-cli");
|
|
10688
10873
|
}
|
|
10689
10874
|
async createStreamLogger(request) {
|
|
10690
10875
|
const logDir = this.resolveLogDirectory();
|
|
@@ -10692,13 +10877,13 @@ ${prompt}` : prompt;
|
|
|
10692
10877
|
return void 0;
|
|
10693
10878
|
}
|
|
10694
10879
|
try {
|
|
10695
|
-
await (0,
|
|
10880
|
+
await (0, import_promises21.mkdir)(logDir, { recursive: true });
|
|
10696
10881
|
} catch (error) {
|
|
10697
10882
|
const message = error instanceof Error ? error.message : String(error);
|
|
10698
10883
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
10699
10884
|
return void 0;
|
|
10700
10885
|
}
|
|
10701
|
-
const filePath =
|
|
10886
|
+
const filePath = import_node_path23.default.join(logDir, buildLogFilename5(request, this.targetName));
|
|
10702
10887
|
try {
|
|
10703
10888
|
const logger = await PiStreamLogger.create({
|
|
10704
10889
|
filePath,
|
|
@@ -10731,7 +10916,7 @@ var PiStreamLogger = class _PiStreamLogger {
|
|
|
10731
10916
|
constructor(filePath, format) {
|
|
10732
10917
|
this.filePath = filePath;
|
|
10733
10918
|
this.format = format;
|
|
10734
|
-
this.stream = (0,
|
|
10919
|
+
this.stream = (0, import_node_fs10.createWriteStream)(filePath, { flags: "a" });
|
|
10735
10920
|
}
|
|
10736
10921
|
static async create(options) {
|
|
10737
10922
|
const logger = new _PiStreamLogger(options.filePath, options.format);
|
|
@@ -11160,19 +11345,19 @@ function resolveWindowsCmd(executable) {
|
|
|
11160
11345
|
if (lower.endsWith(".js") || lower.endsWith(".exe")) return [executable, []];
|
|
11161
11346
|
let fullPath;
|
|
11162
11347
|
try {
|
|
11163
|
-
fullPath = (0,
|
|
11348
|
+
fullPath = (0, import_node_child_process5.execSync)(`where ${executable}`, { encoding: "utf-8" }).trim().split(/\r?\n/)[0].trim();
|
|
11164
11349
|
} catch {
|
|
11165
11350
|
return [executable, []];
|
|
11166
11351
|
}
|
|
11167
11352
|
const cmdPath = fullPath.endsWith(".cmd") ? fullPath : `${fullPath}.cmd`;
|
|
11168
11353
|
try {
|
|
11169
|
-
const content = (0,
|
|
11354
|
+
const content = (0, import_node_fs10.readFileSync)(cmdPath, "utf-8");
|
|
11170
11355
|
const match = content.match(/"?%_prog%"?\s+"([^"]+\.js)"/);
|
|
11171
11356
|
if (match) {
|
|
11172
|
-
const dp0 =
|
|
11173
|
-
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${
|
|
11357
|
+
const dp0 = import_node_path23.default.dirname(import_node_path23.default.resolve(cmdPath));
|
|
11358
|
+
const scriptPath = match[1].replace(/%dp0%[/\\]?/gi, `${dp0}${import_node_path23.default.sep}`);
|
|
11174
11359
|
try {
|
|
11175
|
-
(0,
|
|
11360
|
+
(0, import_node_fs10.accessSync)(scriptPath);
|
|
11176
11361
|
return ["node", [scriptPath]];
|
|
11177
11362
|
} catch {
|
|
11178
11363
|
}
|
|
@@ -11187,7 +11372,7 @@ async function defaultPiRunner(options) {
|
|
|
11187
11372
|
const [resolvedExe, prefixArgs] = resolveWindowsCmd(parts[0]);
|
|
11188
11373
|
const executableArgs = [...prefixArgs, ...parts.slice(1)];
|
|
11189
11374
|
const allArgs = [...executableArgs, ...options.args];
|
|
11190
|
-
const child = (0,
|
|
11375
|
+
const child = (0, import_node_child_process5.spawn)(resolvedExe, allArgs, {
|
|
11191
11376
|
cwd: options.cwd,
|
|
11192
11377
|
env: options.env,
|
|
11193
11378
|
stdio: ["pipe", "pipe", "pipe"]
|
|
@@ -11246,18 +11431,18 @@ async function defaultPiRunner(options) {
|
|
|
11246
11431
|
|
|
11247
11432
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
11248
11433
|
init_cjs_shims();
|
|
11249
|
-
var
|
|
11434
|
+
var import_node_child_process6 = require("child_process");
|
|
11250
11435
|
var import_node_crypto8 = require("crypto");
|
|
11251
|
-
var
|
|
11252
|
-
var
|
|
11253
|
-
var
|
|
11436
|
+
var import_node_fs11 = require("fs");
|
|
11437
|
+
var import_promises22 = require("fs/promises");
|
|
11438
|
+
var import_node_path25 = __toESM(require("path"), 1);
|
|
11254
11439
|
var import_node_readline = require("readline");
|
|
11255
11440
|
var import_node_url4 = require("url");
|
|
11256
11441
|
|
|
11257
11442
|
// src/paths.ts
|
|
11258
11443
|
init_cjs_shims();
|
|
11259
|
-
var
|
|
11260
|
-
var
|
|
11444
|
+
var import_node_os7 = __toESM(require("os"), 1);
|
|
11445
|
+
var import_node_path24 = __toESM(require("path"), 1);
|
|
11261
11446
|
var logged = false;
|
|
11262
11447
|
function getAgentvHome() {
|
|
11263
11448
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -11268,19 +11453,19 @@ function getAgentvHome() {
|
|
|
11268
11453
|
}
|
|
11269
11454
|
return envHome;
|
|
11270
11455
|
}
|
|
11271
|
-
return
|
|
11456
|
+
return import_node_path24.default.join(import_node_os7.default.homedir(), ".agentv");
|
|
11272
11457
|
}
|
|
11273
11458
|
function getWorkspacesRoot() {
|
|
11274
|
-
return
|
|
11459
|
+
return import_node_path24.default.join(getAgentvHome(), "workspaces");
|
|
11275
11460
|
}
|
|
11276
11461
|
function getSubagentsRoot() {
|
|
11277
|
-
return
|
|
11462
|
+
return import_node_path24.default.join(getAgentvHome(), "subagents");
|
|
11278
11463
|
}
|
|
11279
11464
|
function getTraceStateRoot() {
|
|
11280
|
-
return
|
|
11465
|
+
return import_node_path24.default.join(getAgentvHome(), "trace-state");
|
|
11281
11466
|
}
|
|
11282
11467
|
function getWorkspacePoolRoot() {
|
|
11283
|
-
return
|
|
11468
|
+
return import_node_path24.default.join(getAgentvHome(), "workspace-pool");
|
|
11284
11469
|
}
|
|
11285
11470
|
|
|
11286
11471
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
@@ -11302,11 +11487,11 @@ async function promptInstall() {
|
|
|
11302
11487
|
}
|
|
11303
11488
|
}
|
|
11304
11489
|
function findManagedSdkInstallRoot() {
|
|
11305
|
-
return
|
|
11490
|
+
return import_node_path25.default.join(getAgentvHome(), "deps", "pi-sdk");
|
|
11306
11491
|
}
|
|
11307
11492
|
function resolveGlobalNpmRoot() {
|
|
11308
11493
|
try {
|
|
11309
|
-
const root = (0,
|
|
11494
|
+
const root = (0, import_node_child_process6.execSync)("npm root -g", {
|
|
11310
11495
|
encoding: "utf-8",
|
|
11311
11496
|
stdio: ["ignore", "pipe", "ignore"]
|
|
11312
11497
|
}).trim();
|
|
@@ -11316,12 +11501,12 @@ function resolveGlobalNpmRoot() {
|
|
|
11316
11501
|
}
|
|
11317
11502
|
}
|
|
11318
11503
|
function buildGlobalModuleEntry(moduleName, globalNpmRoot) {
|
|
11319
|
-
return
|
|
11504
|
+
return import_node_path25.default.join(globalNpmRoot, ...moduleName.split("/"), "dist", "index.js");
|
|
11320
11505
|
}
|
|
11321
11506
|
function findAccessiblePath(paths) {
|
|
11322
11507
|
for (const candidate of paths) {
|
|
11323
11508
|
try {
|
|
11324
|
-
(0,
|
|
11509
|
+
(0, import_node_fs11.accessSync)(candidate);
|
|
11325
11510
|
return candidate;
|
|
11326
11511
|
} catch {
|
|
11327
11512
|
}
|
|
@@ -11342,11 +11527,11 @@ async function tryImportLocalSdkModules() {
|
|
|
11342
11527
|
async function tryImportManagedSdkModules() {
|
|
11343
11528
|
const managedRoot = findManagedSdkInstallRoot();
|
|
11344
11529
|
const piCodingAgentEntry = findAccessiblePath([
|
|
11345
|
-
|
|
11530
|
+
import_node_path25.default.join(managedRoot, "node_modules", "@mariozechner", "pi-coding-agent", "dist", "index.js")
|
|
11346
11531
|
]);
|
|
11347
11532
|
const piAiEntry = findAccessiblePath([
|
|
11348
|
-
|
|
11349
|
-
|
|
11533
|
+
import_node_path25.default.join(managedRoot, "node_modules", "@mariozechner", "pi-ai", "dist", "index.js"),
|
|
11534
|
+
import_node_path25.default.join(
|
|
11350
11535
|
managedRoot,
|
|
11351
11536
|
"node_modules",
|
|
11352
11537
|
"@mariozechner",
|
|
@@ -11377,7 +11562,7 @@ async function tryImportGlobalSdkModules() {
|
|
|
11377
11562
|
]);
|
|
11378
11563
|
const piAiEntry = findAccessiblePath([
|
|
11379
11564
|
buildGlobalModuleEntry("@mariozechner/pi-ai", globalNpmRoot),
|
|
11380
|
-
|
|
11565
|
+
import_node_path25.default.join(
|
|
11381
11566
|
globalNpmRoot,
|
|
11382
11567
|
"@mariozechner",
|
|
11383
11568
|
"pi-coding-agent",
|
|
@@ -11401,8 +11586,8 @@ async function tryImportGlobalSdkModules() {
|
|
|
11401
11586
|
}
|
|
11402
11587
|
function installSdkModules(installDir) {
|
|
11403
11588
|
console.error(`Installing @mariozechner/pi-coding-agent into ${installDir} via npm...`);
|
|
11404
|
-
(0,
|
|
11405
|
-
(0,
|
|
11589
|
+
(0, import_node_fs11.mkdirSync)(installDir, { recursive: true });
|
|
11590
|
+
(0, import_node_child_process6.execSync)("npm install --no-save --no-package-lock @mariozechner/pi-coding-agent", {
|
|
11406
11591
|
cwd: installDir,
|
|
11407
11592
|
stdio: "inherit"
|
|
11408
11593
|
});
|
|
@@ -11678,10 +11863,10 @@ ${fileList}`;
|
|
|
11678
11863
|
}
|
|
11679
11864
|
resolveCwd(cwdOverride) {
|
|
11680
11865
|
if (cwdOverride) {
|
|
11681
|
-
return
|
|
11866
|
+
return import_node_path25.default.resolve(cwdOverride);
|
|
11682
11867
|
}
|
|
11683
11868
|
if (this.config.cwd) {
|
|
11684
|
-
return
|
|
11869
|
+
return import_node_path25.default.resolve(this.config.cwd);
|
|
11685
11870
|
}
|
|
11686
11871
|
return process.cwd();
|
|
11687
11872
|
}
|
|
@@ -11700,9 +11885,9 @@ ${fileList}`;
|
|
|
11700
11885
|
}
|
|
11701
11886
|
resolveLogDirectory() {
|
|
11702
11887
|
if (this.config.logDir) {
|
|
11703
|
-
return
|
|
11888
|
+
return import_node_path25.default.resolve(this.config.logDir);
|
|
11704
11889
|
}
|
|
11705
|
-
return
|
|
11890
|
+
return import_node_path25.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
11706
11891
|
}
|
|
11707
11892
|
async createStreamLogger(request) {
|
|
11708
11893
|
const logDir = this.resolveLogDirectory();
|
|
@@ -11710,13 +11895,13 @@ ${fileList}`;
|
|
|
11710
11895
|
return void 0;
|
|
11711
11896
|
}
|
|
11712
11897
|
try {
|
|
11713
|
-
await (0,
|
|
11898
|
+
await (0, import_promises22.mkdir)(logDir, { recursive: true });
|
|
11714
11899
|
} catch (error) {
|
|
11715
11900
|
const message = error instanceof Error ? error.message : String(error);
|
|
11716
11901
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
11717
11902
|
return void 0;
|
|
11718
11903
|
}
|
|
11719
|
-
const filePath =
|
|
11904
|
+
const filePath = import_node_path25.default.join(logDir, buildLogFilename6(request, this.targetName));
|
|
11720
11905
|
try {
|
|
11721
11906
|
const logger = await PiStreamLogger2.create({
|
|
11722
11907
|
filePath,
|
|
@@ -11747,7 +11932,7 @@ var PiStreamLogger2 = class _PiStreamLogger {
|
|
|
11747
11932
|
constructor(filePath, format) {
|
|
11748
11933
|
this.filePath = filePath;
|
|
11749
11934
|
this.format = format;
|
|
11750
|
-
this.stream = (0,
|
|
11935
|
+
this.stream = (0, import_node_fs11.createWriteStream)(filePath, { flags: "a" });
|
|
11751
11936
|
}
|
|
11752
11937
|
static async create(options) {
|
|
11753
11938
|
const logger = new _PiStreamLogger(options.filePath, options.format);
|
|
@@ -11931,7 +12116,7 @@ var ProviderRegistry = class {
|
|
|
11931
12116
|
|
|
11932
12117
|
// src/evaluation/providers/targets.ts
|
|
11933
12118
|
init_cjs_shims();
|
|
11934
|
-
var
|
|
12119
|
+
var import_node_path26 = __toESM(require("path"), 1);
|
|
11935
12120
|
var import_zod3 = require("zod");
|
|
11936
12121
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
11937
12122
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
@@ -12018,11 +12203,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
12018
12203
|
allowLiteral: true,
|
|
12019
12204
|
optionalEnv: true
|
|
12020
12205
|
});
|
|
12021
|
-
if (cwd && evalFilePath && !
|
|
12022
|
-
cwd =
|
|
12206
|
+
if (cwd && evalFilePath && !import_node_path26.default.isAbsolute(cwd)) {
|
|
12207
|
+
cwd = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), cwd);
|
|
12023
12208
|
}
|
|
12024
12209
|
if (!cwd && evalFilePath) {
|
|
12025
|
-
cwd =
|
|
12210
|
+
cwd = import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath));
|
|
12026
12211
|
}
|
|
12027
12212
|
return {
|
|
12028
12213
|
command,
|
|
@@ -12045,15 +12230,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
12045
12230
|
optionalEnv: true
|
|
12046
12231
|
}
|
|
12047
12232
|
);
|
|
12048
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12049
|
-
workspaceTemplate =
|
|
12233
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
12234
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12050
12235
|
}
|
|
12051
12236
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
12052
12237
|
allowLiteral: true,
|
|
12053
12238
|
optionalEnv: true
|
|
12054
12239
|
});
|
|
12055
|
-
if (cwd && evalFilePath && !
|
|
12056
|
-
cwd =
|
|
12240
|
+
if (cwd && evalFilePath && !import_node_path26.default.isAbsolute(cwd)) {
|
|
12241
|
+
cwd = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), cwd);
|
|
12057
12242
|
}
|
|
12058
12243
|
if (cwd && workspaceTemplate) {
|
|
12059
12244
|
throw new Error(
|
|
@@ -12061,7 +12246,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
12061
12246
|
);
|
|
12062
12247
|
}
|
|
12063
12248
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
12064
|
-
cwd =
|
|
12249
|
+
cwd = import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath));
|
|
12065
12250
|
}
|
|
12066
12251
|
const timeoutSeconds = input.timeout_seconds;
|
|
12067
12252
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
@@ -12613,8 +12798,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
12613
12798
|
optionalEnv: true
|
|
12614
12799
|
}
|
|
12615
12800
|
);
|
|
12616
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12617
|
-
workspaceTemplate =
|
|
12801
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
12802
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12618
12803
|
}
|
|
12619
12804
|
if (cwd && workspaceTemplate) {
|
|
12620
12805
|
throw new Error(
|
|
@@ -12698,8 +12883,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
12698
12883
|
optionalEnv: true
|
|
12699
12884
|
}
|
|
12700
12885
|
);
|
|
12701
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12702
|
-
workspaceTemplate =
|
|
12886
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
12887
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12703
12888
|
}
|
|
12704
12889
|
if (cwd && workspaceTemplate) {
|
|
12705
12890
|
throw new Error(
|
|
@@ -12815,8 +13000,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
12815
13000
|
optionalEnv: true
|
|
12816
13001
|
}
|
|
12817
13002
|
);
|
|
12818
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12819
|
-
workspaceTemplate =
|
|
13003
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13004
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12820
13005
|
}
|
|
12821
13006
|
if (cwd && workspaceTemplate) {
|
|
12822
13007
|
throw new Error(
|
|
@@ -12909,8 +13094,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
12909
13094
|
optionalEnv: true
|
|
12910
13095
|
}
|
|
12911
13096
|
);
|
|
12912
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12913
|
-
workspaceTemplate =
|
|
13097
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13098
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
12914
13099
|
}
|
|
12915
13100
|
if (cwd && workspaceTemplate) {
|
|
12916
13101
|
throw new Error(
|
|
@@ -12995,8 +13180,8 @@ function resolvePiCliConfig(target, env, evalFilePath) {
|
|
|
12995
13180
|
`${target.name} pi-cli workspace template`,
|
|
12996
13181
|
{ allowLiteral: true, optionalEnv: true }
|
|
12997
13182
|
);
|
|
12998
|
-
if (workspaceTemplate && evalFilePath && !
|
|
12999
|
-
workspaceTemplate =
|
|
13183
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13184
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13000
13185
|
}
|
|
13001
13186
|
if (cwd && workspaceTemplate) {
|
|
13002
13187
|
throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
|
|
@@ -13050,8 +13235,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
13050
13235
|
optionalEnv: true
|
|
13051
13236
|
}
|
|
13052
13237
|
);
|
|
13053
|
-
if (workspaceTemplate && evalFilePath && !
|
|
13054
|
-
workspaceTemplate =
|
|
13238
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13239
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13055
13240
|
}
|
|
13056
13241
|
if (cwd && workspaceTemplate) {
|
|
13057
13242
|
throw new Error(
|
|
@@ -13107,8 +13292,8 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
13107
13292
|
optionalEnv: true
|
|
13108
13293
|
}
|
|
13109
13294
|
) : void 0;
|
|
13110
|
-
if (workspaceTemplate && evalFilePath && !
|
|
13111
|
-
workspaceTemplate =
|
|
13295
|
+
if (workspaceTemplate && evalFilePath && !import_node_path26.default.isAbsolute(workspaceTemplate)) {
|
|
13296
|
+
workspaceTemplate = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), workspaceTemplate);
|
|
13112
13297
|
}
|
|
13113
13298
|
const executableSource = target.executable;
|
|
13114
13299
|
const waitSource = target.wait;
|
|
@@ -13171,11 +13356,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
|
|
|
13171
13356
|
allowLiteral: true,
|
|
13172
13357
|
optionalEnv: true
|
|
13173
13358
|
});
|
|
13174
|
-
if (cwd && evalFilePath && !
|
|
13175
|
-
cwd =
|
|
13359
|
+
if (cwd && evalFilePath && !import_node_path26.default.isAbsolute(cwd)) {
|
|
13360
|
+
cwd = import_node_path26.default.resolve(import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath)), cwd);
|
|
13176
13361
|
}
|
|
13177
13362
|
if (!cwd && evalFilePath) {
|
|
13178
|
-
cwd =
|
|
13363
|
+
cwd = import_node_path26.default.dirname(import_node_path26.default.resolve(evalFilePath));
|
|
13179
13364
|
}
|
|
13180
13365
|
return {
|
|
13181
13366
|
command,
|
|
@@ -13396,46 +13581,46 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
13396
13581
|
|
|
13397
13582
|
// src/evaluation/providers/vscode-provider.ts
|
|
13398
13583
|
init_cjs_shims();
|
|
13399
|
-
var
|
|
13400
|
-
var
|
|
13401
|
-
var
|
|
13402
|
-
var
|
|
13584
|
+
var import_node_child_process8 = require("child_process");
|
|
13585
|
+
var import_promises29 = require("fs/promises");
|
|
13586
|
+
var import_node_path37 = __toESM(require("path"), 1);
|
|
13587
|
+
var import_node_util4 = require("util");
|
|
13403
13588
|
|
|
13404
13589
|
// src/evaluation/providers/vscode/index.ts
|
|
13405
13590
|
init_cjs_shims();
|
|
13406
13591
|
|
|
13407
13592
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
13408
13593
|
init_cjs_shims();
|
|
13409
|
-
var
|
|
13410
|
-
var
|
|
13594
|
+
var import_promises27 = require("fs/promises");
|
|
13595
|
+
var import_node_path35 = __toESM(require("path"), 1);
|
|
13411
13596
|
|
|
13412
13597
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
13413
13598
|
init_cjs_shims();
|
|
13414
|
-
var
|
|
13415
|
-
var
|
|
13416
|
-
var
|
|
13599
|
+
var import_node_fs12 = require("fs");
|
|
13600
|
+
var import_promises23 = require("fs/promises");
|
|
13601
|
+
var import_node_path27 = __toESM(require("path"), 1);
|
|
13417
13602
|
async function pathExists(target) {
|
|
13418
13603
|
try {
|
|
13419
|
-
await (0,
|
|
13604
|
+
await (0, import_promises23.access)(target, import_node_fs12.constants.F_OK);
|
|
13420
13605
|
return true;
|
|
13421
13606
|
} catch {
|
|
13422
13607
|
return false;
|
|
13423
13608
|
}
|
|
13424
13609
|
}
|
|
13425
13610
|
async function ensureDir(target) {
|
|
13426
|
-
await (0,
|
|
13611
|
+
await (0, import_promises23.mkdir)(target, { recursive: true });
|
|
13427
13612
|
}
|
|
13428
13613
|
async function readDirEntries(target) {
|
|
13429
|
-
const entries = await (0,
|
|
13614
|
+
const entries = await (0, import_promises23.readdir)(target, { withFileTypes: true });
|
|
13430
13615
|
return entries.map((entry) => ({
|
|
13431
13616
|
name: entry.name,
|
|
13432
|
-
absolutePath:
|
|
13617
|
+
absolutePath: import_node_path27.default.join(target, entry.name),
|
|
13433
13618
|
isDirectory: entry.isDirectory()
|
|
13434
13619
|
}));
|
|
13435
13620
|
}
|
|
13436
13621
|
async function removeIfExists(target) {
|
|
13437
13622
|
try {
|
|
13438
|
-
await (0,
|
|
13623
|
+
await (0, import_promises23.rm)(target, { force: true, recursive: false });
|
|
13439
13624
|
} catch (error) {
|
|
13440
13625
|
if (error.code !== "ENOENT") {
|
|
13441
13626
|
throw error;
|
|
@@ -13445,9 +13630,9 @@ async function removeIfExists(target) {
|
|
|
13445
13630
|
|
|
13446
13631
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
13447
13632
|
init_cjs_shims();
|
|
13448
|
-
var
|
|
13633
|
+
var import_node_path28 = __toESM(require("path"), 1);
|
|
13449
13634
|
function pathToFileUri2(filePath) {
|
|
13450
|
-
const absolutePath =
|
|
13635
|
+
const absolutePath = import_node_path28.default.isAbsolute(filePath) ? filePath : import_node_path28.default.resolve(filePath);
|
|
13451
13636
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
13452
13637
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
13453
13638
|
return `file:///${normalizedPath}`;
|
|
@@ -13457,7 +13642,7 @@ function pathToFileUri2(filePath) {
|
|
|
13457
13642
|
|
|
13458
13643
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
13459
13644
|
init_cjs_shims();
|
|
13460
|
-
var
|
|
13645
|
+
var import_node_path29 = __toESM(require("path"), 1);
|
|
13461
13646
|
|
|
13462
13647
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
13463
13648
|
init_cjs_shims();
|
|
@@ -13551,8 +13736,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
13551
13736
|
});
|
|
13552
13737
|
}
|
|
13553
13738
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
13554
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
13555
|
-
const responseList = responseFiles.map((file) => `"${
|
|
13739
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path29.default.basename(file)}`).join("\n");
|
|
13740
|
+
const responseList = responseFiles.map((file) => `"${import_node_path29.default.basename(file)}"`).join(", ");
|
|
13556
13741
|
return renderTemplate2(templateContent, {
|
|
13557
13742
|
requestFiles: requestLines,
|
|
13558
13743
|
responseList
|
|
@@ -13561,8 +13746,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
13561
13746
|
|
|
13562
13747
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
13563
13748
|
init_cjs_shims();
|
|
13564
|
-
var
|
|
13565
|
-
var
|
|
13749
|
+
var import_promises24 = require("fs/promises");
|
|
13750
|
+
var import_node_path30 = __toESM(require("path"), 1);
|
|
13566
13751
|
|
|
13567
13752
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
13568
13753
|
init_cjs_shims();
|
|
@@ -13601,7 +13786,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
13601
13786
|
const maxAttempts = 10;
|
|
13602
13787
|
while (attempts < maxAttempts) {
|
|
13603
13788
|
try {
|
|
13604
|
-
const content = await (0,
|
|
13789
|
+
const content = await (0, import_promises24.readFile)(responseFileFinal, { encoding: "utf8" });
|
|
13605
13790
|
if (!silent) {
|
|
13606
13791
|
process.stdout.write(`${content}
|
|
13607
13792
|
`);
|
|
@@ -13622,7 +13807,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
13622
13807
|
}
|
|
13623
13808
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
13624
13809
|
if (!silent) {
|
|
13625
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
13810
|
+
const fileList = responseFilesFinal.map((file) => import_node_path30.default.basename(file)).join(", ");
|
|
13626
13811
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
13627
13812
|
}
|
|
13628
13813
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -13631,7 +13816,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
13631
13816
|
while (pending.size > 0) {
|
|
13632
13817
|
if (Date.now() >= deadline) {
|
|
13633
13818
|
if (!silent) {
|
|
13634
|
-
const remaining = [...pending].map((f) =>
|
|
13819
|
+
const remaining = [...pending].map((f) => import_node_path30.default.basename(f)).join(", ");
|
|
13635
13820
|
console.error(
|
|
13636
13821
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
13637
13822
|
);
|
|
@@ -13658,7 +13843,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
13658
13843
|
const maxAttempts = 10;
|
|
13659
13844
|
while (attempts < maxAttempts) {
|
|
13660
13845
|
try {
|
|
13661
|
-
const content = await (0,
|
|
13846
|
+
const content = await (0, import_promises24.readFile)(file, { encoding: "utf8" });
|
|
13662
13847
|
if (!silent) {
|
|
13663
13848
|
process.stdout.write(`${content}
|
|
13664
13849
|
`);
|
|
@@ -13681,24 +13866,24 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
13681
13866
|
|
|
13682
13867
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
13683
13868
|
init_cjs_shims();
|
|
13684
|
-
var
|
|
13685
|
-
var
|
|
13686
|
-
var
|
|
13687
|
-
var
|
|
13869
|
+
var import_node_child_process7 = require("child_process");
|
|
13870
|
+
var import_promises25 = require("fs/promises");
|
|
13871
|
+
var import_node_path32 = __toESM(require("path"), 1);
|
|
13872
|
+
var import_node_util3 = require("util");
|
|
13688
13873
|
|
|
13689
13874
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
13690
13875
|
init_cjs_shims();
|
|
13691
|
-
var
|
|
13876
|
+
var import_node_path31 = __toESM(require("path"), 1);
|
|
13692
13877
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
13693
13878
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
13694
13879
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
13695
13880
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
13696
|
-
return
|
|
13881
|
+
return import_node_path31.default.join(getSubagentsRoot(), folder);
|
|
13697
13882
|
}
|
|
13698
13883
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
13699
13884
|
|
|
13700
13885
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
13701
|
-
var
|
|
13886
|
+
var execAsync3 = (0, import_node_util3.promisify)(import_node_child_process7.exec);
|
|
13702
13887
|
function shellQuote(cmd) {
|
|
13703
13888
|
return cmd.includes(" ") ? `"${cmd}"` : cmd;
|
|
13704
13889
|
}
|
|
@@ -13709,7 +13894,7 @@ model: Grok Code Fast 1 (copilot)
|
|
|
13709
13894
|
function spawnVsCode(vscodeCmd, args, options) {
|
|
13710
13895
|
const useShell = options?.shell ?? true;
|
|
13711
13896
|
const command = useShell ? shellQuote(vscodeCmd) : vscodeCmd;
|
|
13712
|
-
const child = (0,
|
|
13897
|
+
const child = (0, import_node_child_process7.spawn)(command, args, {
|
|
13713
13898
|
windowsHide: true,
|
|
13714
13899
|
shell: useShell,
|
|
13715
13900
|
detached: false
|
|
@@ -13744,7 +13929,7 @@ async function raceSpawnError(child, graceMs = 200) {
|
|
|
13744
13929
|
}
|
|
13745
13930
|
async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
|
|
13746
13931
|
try {
|
|
13747
|
-
const { stdout } = await
|
|
13932
|
+
const { stdout } = await execAsync3(`${shellQuote(vscodeCmd)} --status`, {
|
|
13748
13933
|
timeout: 1e4,
|
|
13749
13934
|
windowsHide: true
|
|
13750
13935
|
});
|
|
@@ -13760,12 +13945,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
13760
13945
|
await raceSpawnError(child);
|
|
13761
13946
|
return true;
|
|
13762
13947
|
}
|
|
13763
|
-
const aliveFile =
|
|
13948
|
+
const aliveFile = import_node_path32.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
13764
13949
|
await removeIfExists(aliveFile);
|
|
13765
|
-
const githubAgentsDir =
|
|
13766
|
-
await (0,
|
|
13767
|
-
const wakeupDst =
|
|
13768
|
-
await (0,
|
|
13950
|
+
const githubAgentsDir = import_node_path32.default.join(subagentDir, ".github", "agents");
|
|
13951
|
+
await (0, import_promises25.mkdir)(githubAgentsDir, { recursive: true });
|
|
13952
|
+
const wakeupDst = import_node_path32.default.join(githubAgentsDir, "wakeup.md");
|
|
13953
|
+
await (0, import_promises25.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
13769
13954
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
13770
13955
|
label: "open-workspace"
|
|
13771
13956
|
});
|
|
@@ -13777,7 +13962,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
13777
13962
|
"chat",
|
|
13778
13963
|
"-m",
|
|
13779
13964
|
wakeupChatId,
|
|
13780
|
-
`create a file named .alive in the ${
|
|
13965
|
+
`create a file named .alive in the ${import_node_path32.default.basename(subagentDir)} folder`
|
|
13781
13966
|
];
|
|
13782
13967
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
13783
13968
|
await raceSpawnError(wakeupChild);
|
|
@@ -13792,27 +13977,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
13792
13977
|
return true;
|
|
13793
13978
|
}
|
|
13794
13979
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
13795
|
-
const workspacePath =
|
|
13796
|
-
const messagesDir =
|
|
13797
|
-
await (0,
|
|
13798
|
-
const reqFile =
|
|
13799
|
-
await (0,
|
|
13980
|
+
const workspacePath = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
|
|
13981
|
+
const messagesDir = import_node_path32.default.join(subagentDir, "messages");
|
|
13982
|
+
await (0, import_promises25.mkdir)(messagesDir, { recursive: true });
|
|
13983
|
+
const reqFile = import_node_path32.default.join(messagesDir, `${timestamp}_req.md`);
|
|
13984
|
+
await (0, import_promises25.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
|
|
13800
13985
|
const reqUri = pathToFileUri2(reqFile);
|
|
13801
13986
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
13802
13987
|
for (const attachment of attachmentPaths) {
|
|
13803
13988
|
chatArgs.push("-a", attachment);
|
|
13804
13989
|
}
|
|
13805
13990
|
chatArgs.push("-a", reqFile);
|
|
13806
|
-
chatArgs.push(`Follow instructions in [${
|
|
13991
|
+
chatArgs.push(`Follow instructions in [${import_node_path32.default.basename(reqFile)}](${reqUri})`);
|
|
13807
13992
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
13808
13993
|
workspacePath,
|
|
13809
|
-
|
|
13994
|
+
import_node_path32.default.basename(subagentDir),
|
|
13810
13995
|
subagentDir,
|
|
13811
13996
|
vscodeCmd
|
|
13812
13997
|
);
|
|
13813
13998
|
if (!workspaceReady) {
|
|
13814
13999
|
throw new Error(
|
|
13815
|
-
`VS Code workspace '${
|
|
14000
|
+
`VS Code workspace '${import_node_path32.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
13816
14001
|
);
|
|
13817
14002
|
}
|
|
13818
14003
|
await sleep2(500);
|
|
@@ -13820,9 +14005,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
13820
14005
|
await raceSpawnError(child);
|
|
13821
14006
|
}
|
|
13822
14007
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
13823
|
-
const workspacePath =
|
|
13824
|
-
const messagesDir =
|
|
13825
|
-
await (0,
|
|
14008
|
+
const workspacePath = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
|
|
14009
|
+
const messagesDir = import_node_path32.default.join(subagentDir, "messages");
|
|
14010
|
+
await (0, import_promises25.mkdir)(messagesDir, { recursive: true });
|
|
13826
14011
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
13827
14012
|
for (const attachment of attachmentPaths) {
|
|
13828
14013
|
chatArgs.push("-a", attachment);
|
|
@@ -13830,13 +14015,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
13830
14015
|
chatArgs.push(chatInstruction);
|
|
13831
14016
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
13832
14017
|
workspacePath,
|
|
13833
|
-
|
|
14018
|
+
import_node_path32.default.basename(subagentDir),
|
|
13834
14019
|
subagentDir,
|
|
13835
14020
|
vscodeCmd
|
|
13836
14021
|
);
|
|
13837
14022
|
if (!workspaceReady) {
|
|
13838
14023
|
throw new Error(
|
|
13839
|
-
`VS Code workspace '${
|
|
14024
|
+
`VS Code workspace '${import_node_path32.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
13840
14025
|
);
|
|
13841
14026
|
}
|
|
13842
14027
|
await sleep2(500);
|
|
@@ -13846,12 +14031,12 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
13846
14031
|
|
|
13847
14032
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
13848
14033
|
init_cjs_shims();
|
|
13849
|
-
var
|
|
13850
|
-
var
|
|
14034
|
+
var import_promises26 = require("fs/promises");
|
|
14035
|
+
var import_node_path34 = __toESM(require("path"), 1);
|
|
13851
14036
|
|
|
13852
14037
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
13853
14038
|
init_cjs_shims();
|
|
13854
|
-
var
|
|
14039
|
+
var import_node_path33 = __toESM(require("path"), 1);
|
|
13855
14040
|
var import_json5 = __toESM(require("json5"), 1);
|
|
13856
14041
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
13857
14042
|
let workspace;
|
|
@@ -13868,10 +14053,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
13868
14053
|
}
|
|
13869
14054
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
13870
14055
|
const folderPath = folder.path;
|
|
13871
|
-
if (
|
|
14056
|
+
if (import_node_path33.default.isAbsolute(folderPath)) {
|
|
13872
14057
|
return folder;
|
|
13873
14058
|
}
|
|
13874
|
-
const absolutePath =
|
|
14059
|
+
const absolutePath = import_node_path33.default.resolve(templateDir, folderPath);
|
|
13875
14060
|
return {
|
|
13876
14061
|
...folder,
|
|
13877
14062
|
path: absolutePath
|
|
@@ -13893,19 +14078,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
13893
14078
|
if (locationMap && typeof locationMap === "object") {
|
|
13894
14079
|
const transformedMap = {};
|
|
13895
14080
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
13896
|
-
const isAbsolute =
|
|
14081
|
+
const isAbsolute = import_node_path33.default.isAbsolute(locationPath);
|
|
13897
14082
|
if (isAbsolute) {
|
|
13898
14083
|
transformedMap[locationPath] = value;
|
|
13899
14084
|
} else {
|
|
13900
14085
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
13901
14086
|
if (firstGlobIndex === -1) {
|
|
13902
|
-
const resolvedPath =
|
|
14087
|
+
const resolvedPath = import_node_path33.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
13903
14088
|
transformedMap[resolvedPath] = value;
|
|
13904
14089
|
} else {
|
|
13905
14090
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
13906
14091
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
13907
14092
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
13908
|
-
const resolvedPath = (
|
|
14093
|
+
const resolvedPath = (import_node_path33.default.resolve(templateDir, basePath) + patternPath).replace(
|
|
13909
14094
|
/\\/g,
|
|
13910
14095
|
"/"
|
|
13911
14096
|
);
|
|
@@ -13946,7 +14131,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
13946
14131
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
13947
14132
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
13948
14133
|
for (const subagent of subagents) {
|
|
13949
|
-
const lockFile =
|
|
14134
|
+
const lockFile = import_node_path34.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
13950
14135
|
if (!await pathExists(lockFile)) {
|
|
13951
14136
|
return subagent.absolutePath;
|
|
13952
14137
|
}
|
|
@@ -13956,26 +14141,26 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
13956
14141
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
13957
14142
|
let workspaceContent;
|
|
13958
14143
|
if (workspaceTemplate) {
|
|
13959
|
-
const workspaceSrc =
|
|
14144
|
+
const workspaceSrc = import_node_path34.default.resolve(workspaceTemplate);
|
|
13960
14145
|
if (!await pathExists(workspaceSrc)) {
|
|
13961
14146
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
13962
14147
|
}
|
|
13963
|
-
const stats = await (0,
|
|
14148
|
+
const stats = await (0, import_promises26.stat)(workspaceSrc);
|
|
13964
14149
|
if (!stats.isFile()) {
|
|
13965
14150
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
13966
14151
|
}
|
|
13967
|
-
const templateText = await (0,
|
|
14152
|
+
const templateText = await (0, import_promises26.readFile)(workspaceSrc, "utf8");
|
|
13968
14153
|
workspaceContent = JSON.parse(templateText);
|
|
13969
14154
|
} else {
|
|
13970
14155
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
13971
14156
|
}
|
|
13972
|
-
const workspaceName = `${
|
|
13973
|
-
const workspaceDst =
|
|
13974
|
-
const templateDir = workspaceTemplate ?
|
|
14157
|
+
const workspaceName = `${import_node_path34.default.basename(subagentDir)}.code-workspace`;
|
|
14158
|
+
const workspaceDst = import_node_path34.default.join(subagentDir, workspaceName);
|
|
14159
|
+
const templateDir = workspaceTemplate ? import_node_path34.default.dirname(import_node_path34.default.resolve(workspaceTemplate)) : subagentDir;
|
|
13975
14160
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
13976
14161
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
13977
14162
|
if (cwd) {
|
|
13978
|
-
const absCwd =
|
|
14163
|
+
const absCwd = import_node_path34.default.resolve(cwd);
|
|
13979
14164
|
const parsed = JSON.parse(transformedContent);
|
|
13980
14165
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
13981
14166
|
if (!alreadyPresent) {
|
|
@@ -13983,36 +14168,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
13983
14168
|
transformedContent = JSON.stringify(parsed, null, 2);
|
|
13984
14169
|
}
|
|
13985
14170
|
}
|
|
13986
|
-
await (0,
|
|
13987
|
-
const messagesDir =
|
|
13988
|
-
await (0,
|
|
14171
|
+
await (0, import_promises26.writeFile)(workspaceDst, transformedContent, "utf8");
|
|
14172
|
+
const messagesDir = import_node_path34.default.join(subagentDir, "messages");
|
|
14173
|
+
await (0, import_promises26.mkdir)(messagesDir, { recursive: true });
|
|
13989
14174
|
return { workspace: workspaceDst, messagesDir };
|
|
13990
14175
|
}
|
|
13991
14176
|
async function createSubagentLock(subagentDir) {
|
|
13992
|
-
const messagesDir =
|
|
14177
|
+
const messagesDir = import_node_path34.default.join(subagentDir, "messages");
|
|
13993
14178
|
if (await pathExists(messagesDir)) {
|
|
13994
|
-
const files = await (0,
|
|
14179
|
+
const files = await (0, import_promises26.readdir)(messagesDir);
|
|
13995
14180
|
await Promise.all(
|
|
13996
14181
|
files.map(async (file) => {
|
|
13997
|
-
const target =
|
|
14182
|
+
const target = import_node_path34.default.join(messagesDir, file);
|
|
13998
14183
|
await removeIfExists(target);
|
|
13999
14184
|
})
|
|
14000
14185
|
);
|
|
14001
14186
|
}
|
|
14002
|
-
const githubAgentsDir =
|
|
14187
|
+
const githubAgentsDir = import_node_path34.default.join(subagentDir, ".github", "agents");
|
|
14003
14188
|
if (await pathExists(githubAgentsDir)) {
|
|
14004
|
-
const agentFiles = await (0,
|
|
14189
|
+
const agentFiles = await (0, import_promises26.readdir)(githubAgentsDir);
|
|
14005
14190
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
14006
14191
|
await Promise.all(
|
|
14007
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
14192
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path34.default.join(githubAgentsDir, file)))
|
|
14008
14193
|
);
|
|
14009
14194
|
}
|
|
14010
|
-
const lockFile =
|
|
14011
|
-
await (0,
|
|
14195
|
+
const lockFile = import_node_path34.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
14196
|
+
await (0, import_promises26.writeFile)(lockFile, "", { encoding: "utf8" });
|
|
14012
14197
|
return lockFile;
|
|
14013
14198
|
}
|
|
14014
14199
|
async function removeSubagentLock(subagentDir) {
|
|
14015
|
-
const lockFile =
|
|
14200
|
+
const lockFile = import_node_path34.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
14016
14201
|
await removeIfExists(lockFile);
|
|
14017
14202
|
}
|
|
14018
14203
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -14032,11 +14217,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
14032
14217
|
return 1;
|
|
14033
14218
|
}
|
|
14034
14219
|
if (promptFile) {
|
|
14035
|
-
const githubAgentsDir =
|
|
14036
|
-
await (0,
|
|
14037
|
-
const agentFile =
|
|
14220
|
+
const githubAgentsDir = import_node_path34.default.join(subagentDir, ".github", "agents");
|
|
14221
|
+
await (0, import_promises26.mkdir)(githubAgentsDir, { recursive: true });
|
|
14222
|
+
const agentFile = import_node_path34.default.join(githubAgentsDir, `${chatId}.md`);
|
|
14038
14223
|
try {
|
|
14039
|
-
await (0,
|
|
14224
|
+
await (0, import_promises26.copyFile)(promptFile, agentFile);
|
|
14040
14225
|
} catch (error) {
|
|
14041
14226
|
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
14042
14227
|
return 1;
|
|
@@ -14053,11 +14238,11 @@ async function resolvePromptFile(promptFile) {
|
|
|
14053
14238
|
if (!promptFile) {
|
|
14054
14239
|
return void 0;
|
|
14055
14240
|
}
|
|
14056
|
-
const resolvedPrompt =
|
|
14241
|
+
const resolvedPrompt = import_node_path35.default.resolve(promptFile);
|
|
14057
14242
|
if (!await pathExists(resolvedPrompt)) {
|
|
14058
14243
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
14059
14244
|
}
|
|
14060
|
-
const promptStats = await (0,
|
|
14245
|
+
const promptStats = await (0, import_promises27.stat)(resolvedPrompt);
|
|
14061
14246
|
if (!promptStats.isFile()) {
|
|
14062
14247
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
14063
14248
|
}
|
|
@@ -14069,7 +14254,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
14069
14254
|
}
|
|
14070
14255
|
const resolved = [];
|
|
14071
14256
|
for (const attachment of extraAttachments) {
|
|
14072
|
-
const resolvedPath =
|
|
14257
|
+
const resolvedPath = import_node_path35.default.resolve(attachment);
|
|
14073
14258
|
if (!await pathExists(resolvedPath)) {
|
|
14074
14259
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
14075
14260
|
}
|
|
@@ -14111,7 +14296,7 @@ async function dispatchAgentSession(options) {
|
|
|
14111
14296
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
14112
14297
|
};
|
|
14113
14298
|
}
|
|
14114
|
-
const subagentName =
|
|
14299
|
+
const subagentName = import_node_path35.default.basename(subagentDir);
|
|
14115
14300
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
14116
14301
|
const preparationResult = await prepareSubagentDirectory(
|
|
14117
14302
|
subagentDir,
|
|
@@ -14139,9 +14324,9 @@ async function dispatchAgentSession(options) {
|
|
|
14139
14324
|
};
|
|
14140
14325
|
}
|
|
14141
14326
|
const timestamp = generateTimestamp();
|
|
14142
|
-
const messagesDir =
|
|
14143
|
-
const responseFileTmp =
|
|
14144
|
-
const responseFileFinal =
|
|
14327
|
+
const messagesDir = import_node_path35.default.join(subagentDir, "messages");
|
|
14328
|
+
const responseFileTmp = import_node_path35.default.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
14329
|
+
const responseFileFinal = import_node_path35.default.join(messagesDir, `${timestamp}_res.md`);
|
|
14145
14330
|
const requestInstructions = createRequestPrompt(
|
|
14146
14331
|
userQuery,
|
|
14147
14332
|
responseFileTmp,
|
|
@@ -14246,7 +14431,7 @@ async function dispatchBatchAgent(options) {
|
|
|
14246
14431
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
14247
14432
|
};
|
|
14248
14433
|
}
|
|
14249
|
-
subagentName =
|
|
14434
|
+
subagentName = import_node_path35.default.basename(subagentDir);
|
|
14250
14435
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
14251
14436
|
const preparationResult = await prepareSubagentDirectory(
|
|
14252
14437
|
subagentDir,
|
|
@@ -14277,24 +14462,24 @@ async function dispatchBatchAgent(options) {
|
|
|
14277
14462
|
};
|
|
14278
14463
|
}
|
|
14279
14464
|
const timestamp = generateTimestamp();
|
|
14280
|
-
const messagesDir =
|
|
14465
|
+
const messagesDir = import_node_path35.default.join(subagentDir, "messages");
|
|
14281
14466
|
requestFiles = userQueries.map(
|
|
14282
|
-
(_, index) =>
|
|
14467
|
+
(_, index) => import_node_path35.default.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
14283
14468
|
);
|
|
14284
14469
|
const responseTmpFiles = userQueries.map(
|
|
14285
|
-
(_, index) =>
|
|
14470
|
+
(_, index) => import_node_path35.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
14286
14471
|
);
|
|
14287
14472
|
responseFilesFinal = userQueries.map(
|
|
14288
|
-
(_, index) =>
|
|
14473
|
+
(_, index) => import_node_path35.default.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
14289
14474
|
);
|
|
14290
|
-
const orchestratorFile =
|
|
14475
|
+
const orchestratorFile = import_node_path35.default.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
14291
14476
|
if (!dryRun) {
|
|
14292
14477
|
await Promise.all(
|
|
14293
14478
|
userQueries.map((query, index) => {
|
|
14294
14479
|
const reqFile = requestFiles[index];
|
|
14295
14480
|
const tmpFile = responseTmpFiles[index];
|
|
14296
14481
|
const finalFile = responseFilesFinal[index];
|
|
14297
|
-
return (0,
|
|
14482
|
+
return (0, import_promises27.writeFile)(
|
|
14298
14483
|
reqFile,
|
|
14299
14484
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
14300
14485
|
{ encoding: "utf8" }
|
|
@@ -14306,7 +14491,7 @@ async function dispatchBatchAgent(options) {
|
|
|
14306
14491
|
responseFilesFinal,
|
|
14307
14492
|
orchestratorTemplateContent
|
|
14308
14493
|
);
|
|
14309
|
-
await (0,
|
|
14494
|
+
await (0, import_promises27.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
14310
14495
|
}
|
|
14311
14496
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
14312
14497
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -14373,8 +14558,8 @@ async function dispatchBatchAgent(options) {
|
|
|
14373
14558
|
|
|
14374
14559
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
14375
14560
|
init_cjs_shims();
|
|
14376
|
-
var
|
|
14377
|
-
var
|
|
14561
|
+
var import_promises28 = require("fs/promises");
|
|
14562
|
+
var import_node_path36 = __toESM(require("path"), 1);
|
|
14378
14563
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
14379
14564
|
folders: [
|
|
14380
14565
|
{
|
|
@@ -14405,7 +14590,7 @@ async function provisionSubagents(options) {
|
|
|
14405
14590
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
14406
14591
|
throw new Error("subagents must be a positive integer");
|
|
14407
14592
|
}
|
|
14408
|
-
const targetPath =
|
|
14593
|
+
const targetPath = import_node_path36.default.resolve(targetRoot);
|
|
14409
14594
|
if (!dryRun) {
|
|
14410
14595
|
await ensureDir(targetPath);
|
|
14411
14596
|
}
|
|
@@ -14425,7 +14610,7 @@ async function provisionSubagents(options) {
|
|
|
14425
14610
|
continue;
|
|
14426
14611
|
}
|
|
14427
14612
|
highestNumber = Math.max(highestNumber, parsed);
|
|
14428
|
-
const lockFile =
|
|
14613
|
+
const lockFile = import_node_path36.default.join(entry.absolutePath, lockName);
|
|
14429
14614
|
const locked = await pathExists(lockFile);
|
|
14430
14615
|
if (locked) {
|
|
14431
14616
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -14442,10 +14627,10 @@ async function provisionSubagents(options) {
|
|
|
14442
14627
|
break;
|
|
14443
14628
|
}
|
|
14444
14629
|
const subagentDir = subagent.absolutePath;
|
|
14445
|
-
const githubAgentsDir =
|
|
14446
|
-
const lockFile =
|
|
14447
|
-
const workspaceDst =
|
|
14448
|
-
const wakeupDst =
|
|
14630
|
+
const githubAgentsDir = import_node_path36.default.join(subagentDir, ".github", "agents");
|
|
14631
|
+
const lockFile = import_node_path36.default.join(subagentDir, lockName);
|
|
14632
|
+
const workspaceDst = import_node_path36.default.join(subagentDir, `${import_node_path36.default.basename(subagentDir)}.code-workspace`);
|
|
14633
|
+
const wakeupDst = import_node_path36.default.join(githubAgentsDir, "wakeup.md");
|
|
14449
14634
|
const isLocked = await pathExists(lockFile);
|
|
14450
14635
|
if (isLocked && !force) {
|
|
14451
14636
|
continue;
|
|
@@ -14454,8 +14639,8 @@ async function provisionSubagents(options) {
|
|
|
14454
14639
|
if (!dryRun) {
|
|
14455
14640
|
await removeIfExists(lockFile);
|
|
14456
14641
|
await ensureDir(githubAgentsDir);
|
|
14457
|
-
await (0,
|
|
14458
|
-
await (0,
|
|
14642
|
+
await (0, import_promises28.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14643
|
+
await (0, import_promises28.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
14459
14644
|
}
|
|
14460
14645
|
created.push(subagentDir);
|
|
14461
14646
|
lockedSubagents.delete(subagentDir);
|
|
@@ -14465,8 +14650,8 @@ async function provisionSubagents(options) {
|
|
|
14465
14650
|
if (!isLocked && force) {
|
|
14466
14651
|
if (!dryRun) {
|
|
14467
14652
|
await ensureDir(githubAgentsDir);
|
|
14468
|
-
await (0,
|
|
14469
|
-
await (0,
|
|
14653
|
+
await (0, import_promises28.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14654
|
+
await (0, import_promises28.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
14470
14655
|
}
|
|
14471
14656
|
created.push(subagentDir);
|
|
14472
14657
|
subagentsProvisioned += 1;
|
|
@@ -14474,8 +14659,8 @@ async function provisionSubagents(options) {
|
|
|
14474
14659
|
}
|
|
14475
14660
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
14476
14661
|
await ensureDir(githubAgentsDir);
|
|
14477
|
-
await (0,
|
|
14478
|
-
await (0,
|
|
14662
|
+
await (0, import_promises28.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14663
|
+
await (0, import_promises28.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
14479
14664
|
}
|
|
14480
14665
|
skippedExisting.push(subagentDir);
|
|
14481
14666
|
subagentsProvisioned += 1;
|
|
@@ -14483,15 +14668,15 @@ async function provisionSubagents(options) {
|
|
|
14483
14668
|
let nextIndex = highestNumber;
|
|
14484
14669
|
while (subagentsProvisioned < subagents) {
|
|
14485
14670
|
nextIndex += 1;
|
|
14486
|
-
const subagentDir =
|
|
14487
|
-
const githubAgentsDir =
|
|
14488
|
-
const workspaceDst =
|
|
14489
|
-
const wakeupDst =
|
|
14671
|
+
const subagentDir = import_node_path36.default.join(targetPath, `subagent-${nextIndex}`);
|
|
14672
|
+
const githubAgentsDir = import_node_path36.default.join(subagentDir, ".github", "agents");
|
|
14673
|
+
const workspaceDst = import_node_path36.default.join(subagentDir, `${import_node_path36.default.basename(subagentDir)}.code-workspace`);
|
|
14674
|
+
const wakeupDst = import_node_path36.default.join(githubAgentsDir, "wakeup.md");
|
|
14490
14675
|
if (!dryRun) {
|
|
14491
14676
|
await ensureDir(subagentDir);
|
|
14492
14677
|
await ensureDir(githubAgentsDir);
|
|
14493
|
-
await (0,
|
|
14494
|
-
await (0,
|
|
14678
|
+
await (0, import_promises28.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
14679
|
+
await (0, import_promises28.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
14495
14680
|
}
|
|
14496
14681
|
created.push(subagentDir);
|
|
14497
14682
|
subagentsProvisioned += 1;
|
|
@@ -14534,7 +14719,7 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
14534
14719
|
`;
|
|
14535
14720
|
|
|
14536
14721
|
// src/evaluation/providers/vscode-provider.ts
|
|
14537
|
-
var
|
|
14722
|
+
var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process8.exec);
|
|
14538
14723
|
var VSCodeProvider = class {
|
|
14539
14724
|
id;
|
|
14540
14725
|
kind;
|
|
@@ -14677,9 +14862,9 @@ var VSCodeProvider = class {
|
|
|
14677
14862
|
async function locateVSCodeExecutable(candidate) {
|
|
14678
14863
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
14679
14864
|
if (includesPathSeparator) {
|
|
14680
|
-
const resolved =
|
|
14865
|
+
const resolved = import_node_path37.default.isAbsolute(candidate) ? candidate : import_node_path37.default.resolve(candidate);
|
|
14681
14866
|
try {
|
|
14682
|
-
await (0,
|
|
14867
|
+
await (0, import_promises29.access)(resolved, import_promises29.constants.F_OK);
|
|
14683
14868
|
return resolved;
|
|
14684
14869
|
} catch {
|
|
14685
14870
|
throw new Error(
|
|
@@ -14689,10 +14874,10 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
14689
14874
|
}
|
|
14690
14875
|
const locator = process.platform === "win32" ? "where" : "which";
|
|
14691
14876
|
try {
|
|
14692
|
-
const { stdout } = await
|
|
14877
|
+
const { stdout } = await execAsync4(`${locator} ${candidate}`);
|
|
14693
14878
|
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
14694
14879
|
if (lines.length > 0 && lines[0]) {
|
|
14695
|
-
await (0,
|
|
14880
|
+
await (0, import_promises29.access)(lines[0], import_promises29.constants.F_OK);
|
|
14696
14881
|
return lines[0];
|
|
14697
14882
|
}
|
|
14698
14883
|
} catch {
|
|
@@ -14706,7 +14891,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
14706
14891
|
return void 0;
|
|
14707
14892
|
}
|
|
14708
14893
|
try {
|
|
14709
|
-
const stats = await (0,
|
|
14894
|
+
const stats = await (0, import_promises29.stat)(import_node_path37.default.resolve(template));
|
|
14710
14895
|
return stats.isFile() ? template : void 0;
|
|
14711
14896
|
} catch {
|
|
14712
14897
|
return template;
|
|
@@ -14730,7 +14915,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
14730
14915
|
return "";
|
|
14731
14916
|
}
|
|
14732
14917
|
const buildList = (files) => files.map((absolutePath) => {
|
|
14733
|
-
const fileName =
|
|
14918
|
+
const fileName = import_node_path37.default.basename(absolutePath);
|
|
14734
14919
|
const fileUri = pathToFileUri3(absolutePath);
|
|
14735
14920
|
return `* [${fileName}](${fileUri})`;
|
|
14736
14921
|
});
|
|
@@ -14751,7 +14936,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
14751
14936
|
}
|
|
14752
14937
|
const unique = /* @__PURE__ */ new Map();
|
|
14753
14938
|
for (const attachment of attachments) {
|
|
14754
|
-
const absolutePath =
|
|
14939
|
+
const absolutePath = import_node_path37.default.resolve(attachment);
|
|
14755
14940
|
if (!unique.has(absolutePath)) {
|
|
14756
14941
|
unique.set(absolutePath, absolutePath);
|
|
14757
14942
|
}
|
|
@@ -14759,7 +14944,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
14759
14944
|
return Array.from(unique.values());
|
|
14760
14945
|
}
|
|
14761
14946
|
function pathToFileUri3(filePath) {
|
|
14762
|
-
const absolutePath =
|
|
14947
|
+
const absolutePath = import_node_path37.default.isAbsolute(filePath) ? filePath : import_node_path37.default.resolve(filePath);
|
|
14763
14948
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
14764
14949
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
14765
14950
|
return `file:///${normalizedPath}`;
|
|
@@ -14772,7 +14957,7 @@ function normalizeAttachments(attachments) {
|
|
|
14772
14957
|
}
|
|
14773
14958
|
const deduped = /* @__PURE__ */ new Set();
|
|
14774
14959
|
for (const attachment of attachments) {
|
|
14775
|
-
deduped.add(
|
|
14960
|
+
deduped.add(import_node_path37.default.resolve(attachment));
|
|
14776
14961
|
}
|
|
14777
14962
|
return Array.from(deduped);
|
|
14778
14963
|
}
|
|
@@ -14781,7 +14966,7 @@ function mergeAttachments(all) {
|
|
|
14781
14966
|
for (const list of all) {
|
|
14782
14967
|
if (!list) continue;
|
|
14783
14968
|
for (const inputFile of list) {
|
|
14784
|
-
deduped.add(
|
|
14969
|
+
deduped.add(import_node_path37.default.resolve(inputFile));
|
|
14785
14970
|
}
|
|
14786
14971
|
}
|
|
14787
14972
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -14874,9 +15059,9 @@ function isAgentProvider(provider) {
|
|
|
14874
15059
|
|
|
14875
15060
|
// src/evaluation/providers/targets-file.ts
|
|
14876
15061
|
init_cjs_shims();
|
|
14877
|
-
var
|
|
14878
|
-
var
|
|
14879
|
-
var
|
|
15062
|
+
var import_node_fs13 = require("fs");
|
|
15063
|
+
var import_promises30 = require("fs/promises");
|
|
15064
|
+
var import_node_path38 = __toESM(require("path"), 1);
|
|
14880
15065
|
var import_yaml8 = require("yaml");
|
|
14881
15066
|
function isRecord(value) {
|
|
14882
15067
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -14909,18 +15094,18 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
14909
15094
|
}
|
|
14910
15095
|
async function fileExists3(filePath) {
|
|
14911
15096
|
try {
|
|
14912
|
-
await (0,
|
|
15097
|
+
await (0, import_promises30.access)(filePath, import_node_fs13.constants.F_OK);
|
|
14913
15098
|
return true;
|
|
14914
15099
|
} catch {
|
|
14915
15100
|
return false;
|
|
14916
15101
|
}
|
|
14917
15102
|
}
|
|
14918
15103
|
async function readTargetDefinitions(filePath) {
|
|
14919
|
-
const absolutePath =
|
|
15104
|
+
const absolutePath = import_node_path38.default.resolve(filePath);
|
|
14920
15105
|
if (!await fileExists3(absolutePath)) {
|
|
14921
15106
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
14922
15107
|
}
|
|
14923
|
-
const raw = await (0,
|
|
15108
|
+
const raw = await (0, import_promises30.readFile)(absolutePath, "utf8");
|
|
14924
15109
|
const parsed = (0, import_yaml8.parse)(raw);
|
|
14925
15110
|
if (!isRecord(parsed)) {
|
|
14926
15111
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -14937,16 +15122,16 @@ function listTargetNames(definitions) {
|
|
|
14937
15122
|
|
|
14938
15123
|
// src/evaluation/providers/provider-discovery.ts
|
|
14939
15124
|
init_cjs_shims();
|
|
14940
|
-
var
|
|
15125
|
+
var import_node_path39 = __toESM(require("path"), 1);
|
|
14941
15126
|
var import_fast_glob2 = __toESM(require("fast-glob"), 1);
|
|
14942
15127
|
async function discoverProviders(registry, baseDir) {
|
|
14943
15128
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
14944
15129
|
const candidateDirs = [];
|
|
14945
|
-
let dir =
|
|
14946
|
-
const root =
|
|
15130
|
+
let dir = import_node_path39.default.resolve(baseDir);
|
|
15131
|
+
const root = import_node_path39.default.parse(dir).root;
|
|
14947
15132
|
while (dir !== root) {
|
|
14948
|
-
candidateDirs.push(
|
|
14949
|
-
dir =
|
|
15133
|
+
candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "providers"));
|
|
15134
|
+
dir = import_node_path39.default.dirname(dir);
|
|
14950
15135
|
}
|
|
14951
15136
|
let files = [];
|
|
14952
15137
|
for (const providersDir of candidateDirs) {
|
|
@@ -14962,7 +15147,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
14962
15147
|
}
|
|
14963
15148
|
const discoveredKinds = [];
|
|
14964
15149
|
for (const filePath of files) {
|
|
14965
|
-
const basename =
|
|
15150
|
+
const basename = import_node_path39.default.basename(filePath);
|
|
14966
15151
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
14967
15152
|
if (registry.has(kindName)) {
|
|
14968
15153
|
continue;
|
|
@@ -15092,9 +15277,9 @@ function negateScore(score) {
|
|
|
15092
15277
|
|
|
15093
15278
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
15094
15279
|
init_cjs_shims();
|
|
15095
|
-
var
|
|
15096
|
-
var
|
|
15097
|
-
var
|
|
15280
|
+
var import_promises31 = require("fs/promises");
|
|
15281
|
+
var import_node_os8 = require("os");
|
|
15282
|
+
var import_node_path40 = require("path");
|
|
15098
15283
|
init_exec();
|
|
15099
15284
|
|
|
15100
15285
|
// src/runtime/target-proxy.ts
|
|
@@ -15429,8 +15614,8 @@ async function materializeContentForGrader(messages, getWorkDir) {
|
|
|
15429
15614
|
const [, mediaType, base64Data] = match;
|
|
15430
15615
|
const ext = mediaType.split("/")[1] === "jpeg" ? "jpg" : mediaType.split("/")[1] ?? "bin";
|
|
15431
15616
|
const dir = await getWorkDir();
|
|
15432
|
-
const filePath = (0,
|
|
15433
|
-
await (0,
|
|
15617
|
+
const filePath = (0, import_node_path40.join)(dir, `img-${counter++}.${ext}`);
|
|
15618
|
+
await (0, import_promises31.writeFile)(filePath, Buffer.from(base64Data, "base64"));
|
|
15434
15619
|
blocks.push({ type: "image", media_type: img.media_type, path: filePath });
|
|
15435
15620
|
} else {
|
|
15436
15621
|
blocks.push({ type: "image", media_type: img.media_type, path: img.source });
|
|
@@ -15458,7 +15643,7 @@ var CodeEvaluator = class {
|
|
|
15458
15643
|
let imageTmpDir;
|
|
15459
15644
|
const getImageDir = async () => {
|
|
15460
15645
|
if (!imageTmpDir) {
|
|
15461
|
-
imageTmpDir = await (0,
|
|
15646
|
+
imageTmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os8.tmpdir)(), "agentv-img-"));
|
|
15462
15647
|
}
|
|
15463
15648
|
return imageTmpDir;
|
|
15464
15649
|
};
|
|
@@ -15471,9 +15656,9 @@ var CodeEvaluator = class {
|
|
|
15471
15656
|
if (outputForPayload) {
|
|
15472
15657
|
const serialized = JSON.stringify(outputForPayload);
|
|
15473
15658
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
15474
|
-
const tmpDir = await (0,
|
|
15475
|
-
outputPath = (0,
|
|
15476
|
-
await (0,
|
|
15659
|
+
const tmpDir = await (0, import_promises31.mkdtemp)((0, import_node_path40.join)((0, import_node_os8.tmpdir)(), "agentv-grader-"));
|
|
15660
|
+
outputPath = (0, import_node_path40.join)(tmpDir, "output.json");
|
|
15661
|
+
await (0, import_promises31.writeFile)(outputPath, serialized);
|
|
15477
15662
|
outputForPayload = null;
|
|
15478
15663
|
}
|
|
15479
15664
|
}
|
|
@@ -15603,11 +15788,11 @@ var CodeEvaluator = class {
|
|
|
15603
15788
|
await proxyShutdown();
|
|
15604
15789
|
}
|
|
15605
15790
|
if (outputPath) {
|
|
15606
|
-
await (0,
|
|
15791
|
+
await (0, import_promises31.rm)((0, import_node_path40.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
|
|
15607
15792
|
});
|
|
15608
15793
|
}
|
|
15609
15794
|
if (imageTmpDir) {
|
|
15610
|
-
await (0,
|
|
15795
|
+
await (0, import_promises31.rm)(imageTmpDir, { recursive: true, force: true }).catch(() => {
|
|
15611
15796
|
});
|
|
15612
15797
|
}
|
|
15613
15798
|
}
|
|
@@ -15640,8 +15825,8 @@ var import_ai3 = require("ai");
|
|
|
15640
15825
|
|
|
15641
15826
|
// src/evaluation/evaluators/llm-grader.ts
|
|
15642
15827
|
init_cjs_shims();
|
|
15643
|
-
var
|
|
15644
|
-
var
|
|
15828
|
+
var import_promises32 = __toESM(require("fs/promises"), 1);
|
|
15829
|
+
var import_node_path41 = __toESM(require("path"), 1);
|
|
15645
15830
|
var import_ai2 = require("ai");
|
|
15646
15831
|
var import_zod4 = require("zod");
|
|
15647
15832
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -16675,8 +16860,8 @@ function toAiSdkImageParts(images) {
|
|
|
16675
16860
|
}));
|
|
16676
16861
|
}
|
|
16677
16862
|
function resolveSandboxed(basePath, relativePath) {
|
|
16678
|
-
const resolved =
|
|
16679
|
-
if (!resolved.startsWith(basePath +
|
|
16863
|
+
const resolved = import_node_path41.default.resolve(basePath, relativePath);
|
|
16864
|
+
if (!resolved.startsWith(basePath + import_node_path41.default.sep) && resolved !== basePath) {
|
|
16680
16865
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
16681
16866
|
}
|
|
16682
16867
|
return resolved;
|
|
@@ -16691,7 +16876,7 @@ function createFilesystemTools(workspacePath) {
|
|
|
16691
16876
|
execute: async (input) => {
|
|
16692
16877
|
try {
|
|
16693
16878
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
16694
|
-
const entries = await
|
|
16879
|
+
const entries = await import_promises32.default.readdir(resolved, { withFileTypes: true });
|
|
16695
16880
|
return entries.map((e) => ({
|
|
16696
16881
|
name: e.name,
|
|
16697
16882
|
type: e.isDirectory() ? "directory" : "file"
|
|
@@ -16709,20 +16894,20 @@ function createFilesystemTools(workspacePath) {
|
|
|
16709
16894
|
execute: async (input) => {
|
|
16710
16895
|
try {
|
|
16711
16896
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
16712
|
-
const
|
|
16713
|
-
if (
|
|
16897
|
+
const stat13 = await import_promises32.default.stat(resolved);
|
|
16898
|
+
if (stat13.isDirectory()) {
|
|
16714
16899
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
16715
16900
|
}
|
|
16716
|
-
const buffer = Buffer.alloc(Math.min(
|
|
16717
|
-
const fd = await
|
|
16901
|
+
const buffer = Buffer.alloc(Math.min(stat13.size, MAX_FILE_SIZE));
|
|
16902
|
+
const fd = await import_promises32.default.open(resolved, "r");
|
|
16718
16903
|
try {
|
|
16719
16904
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
16720
16905
|
} finally {
|
|
16721
16906
|
await fd.close();
|
|
16722
16907
|
}
|
|
16723
16908
|
const content = buffer.toString("utf-8");
|
|
16724
|
-
const truncated =
|
|
16725
|
-
return { content, truncated, size:
|
|
16909
|
+
const truncated = stat13.size > MAX_FILE_SIZE;
|
|
16910
|
+
return { content, truncated, size: stat13.size };
|
|
16726
16911
|
} catch (error) {
|
|
16727
16912
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
16728
16913
|
}
|
|
@@ -16759,30 +16944,30 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
16759
16944
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
16760
16945
|
let entries;
|
|
16761
16946
|
try {
|
|
16762
|
-
entries = await
|
|
16947
|
+
entries = await import_promises32.default.readdir(dirPath, { withFileTypes: true });
|
|
16763
16948
|
} catch {
|
|
16764
16949
|
return;
|
|
16765
16950
|
}
|
|
16766
16951
|
for (const entry of entries) {
|
|
16767
16952
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
16768
16953
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
16769
|
-
const fullPath =
|
|
16954
|
+
const fullPath = import_node_path41.default.join(dirPath, entry.name);
|
|
16770
16955
|
if (entry.isDirectory()) {
|
|
16771
16956
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
16772
16957
|
} else if (entry.isFile()) {
|
|
16773
|
-
const ext =
|
|
16958
|
+
const ext = import_node_path41.default.extname(entry.name).toLowerCase();
|
|
16774
16959
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
16775
16960
|
try {
|
|
16776
|
-
const
|
|
16777
|
-
if (
|
|
16778
|
-
const content = await
|
|
16961
|
+
const stat13 = await import_promises32.default.stat(fullPath);
|
|
16962
|
+
if (stat13.size > MAX_FILE_SIZE) continue;
|
|
16963
|
+
const content = await import_promises32.default.readFile(fullPath, "utf-8");
|
|
16779
16964
|
const lines = content.split("\n");
|
|
16780
16965
|
for (let i = 0; i < lines.length; i++) {
|
|
16781
16966
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
16782
16967
|
regex.lastIndex = 0;
|
|
16783
16968
|
if (regex.test(lines[i])) {
|
|
16784
16969
|
matches.push({
|
|
16785
|
-
file:
|
|
16970
|
+
file: import_node_path41.default.relative(workspacePath, fullPath),
|
|
16786
16971
|
line: i + 1,
|
|
16787
16972
|
text: lines[i].substring(0, 200)
|
|
16788
16973
|
});
|
|
@@ -18752,7 +18937,7 @@ function runEqualsAssertion(output, value) {
|
|
|
18752
18937
|
init_cjs_shims();
|
|
18753
18938
|
var import_node_crypto11 = require("crypto");
|
|
18754
18939
|
var import_node_fs16 = require("fs");
|
|
18755
|
-
var
|
|
18940
|
+
var import_promises36 = require("fs/promises");
|
|
18756
18941
|
var import_node_path49 = __toESM(require("path"), 1);
|
|
18757
18942
|
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
18758
18943
|
|
|
@@ -18980,7 +19165,7 @@ var InlineAssertEvaluator = class {
|
|
|
18980
19165
|
|
|
18981
19166
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
18982
19167
|
init_cjs_shims();
|
|
18983
|
-
var
|
|
19168
|
+
var import_node_path42 = __toESM(require("path"), 1);
|
|
18984
19169
|
async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
18985
19170
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
18986
19171
|
if (!context2) {
|
|
@@ -19032,7 +19217,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
19032
19217
|
};
|
|
19033
19218
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
19034
19219
|
const scriptPath = script[script.length - 1];
|
|
19035
|
-
const cwd =
|
|
19220
|
+
const cwd = import_node_path42.default.dirname(scriptPath);
|
|
19036
19221
|
try {
|
|
19037
19222
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
19038
19223
|
const prompt = stdout.trim();
|
|
@@ -19316,16 +19501,16 @@ function createBuiltinRegistry() {
|
|
|
19316
19501
|
|
|
19317
19502
|
// src/evaluation/registry/assertion-discovery.ts
|
|
19318
19503
|
init_cjs_shims();
|
|
19319
|
-
var
|
|
19504
|
+
var import_node_path43 = __toESM(require("path"), 1);
|
|
19320
19505
|
var import_fast_glob3 = __toESM(require("fast-glob"), 1);
|
|
19321
19506
|
async function discoverAssertions(registry, baseDir) {
|
|
19322
19507
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
19323
19508
|
const candidateDirs = [];
|
|
19324
|
-
let dir =
|
|
19325
|
-
const root =
|
|
19509
|
+
let dir = import_node_path43.default.resolve(baseDir);
|
|
19510
|
+
const root = import_node_path43.default.parse(dir).root;
|
|
19326
19511
|
while (dir !== root) {
|
|
19327
|
-
candidateDirs.push(
|
|
19328
|
-
dir =
|
|
19512
|
+
candidateDirs.push(import_node_path43.default.join(dir, ".agentv", "assertions"));
|
|
19513
|
+
dir = import_node_path43.default.dirname(dir);
|
|
19329
19514
|
}
|
|
19330
19515
|
let files = [];
|
|
19331
19516
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -19341,7 +19526,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
19341
19526
|
}
|
|
19342
19527
|
const discoveredTypes = [];
|
|
19343
19528
|
for (const filePath of files) {
|
|
19344
|
-
const basename =
|
|
19529
|
+
const basename = import_node_path43.default.basename(filePath);
|
|
19345
19530
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
19346
19531
|
if (registry.has(typeName)) {
|
|
19347
19532
|
continue;
|
|
@@ -19360,17 +19545,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
19360
19545
|
|
|
19361
19546
|
// src/evaluation/registry/grader-discovery.ts
|
|
19362
19547
|
init_cjs_shims();
|
|
19363
|
-
var
|
|
19548
|
+
var import_node_path44 = __toESM(require("path"), 1);
|
|
19364
19549
|
var import_fast_glob4 = __toESM(require("fast-glob"), 1);
|
|
19365
19550
|
async function discoverGraders(registry, baseDir) {
|
|
19366
19551
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
19367
19552
|
const candidateDirs = [];
|
|
19368
|
-
let dir =
|
|
19369
|
-
const root =
|
|
19553
|
+
let dir = import_node_path44.default.resolve(baseDir);
|
|
19554
|
+
const root = import_node_path44.default.parse(dir).root;
|
|
19370
19555
|
while (dir !== root) {
|
|
19371
|
-
candidateDirs.push(
|
|
19372
|
-
candidateDirs.push(
|
|
19373
|
-
dir =
|
|
19556
|
+
candidateDirs.push(import_node_path44.default.join(dir, ".agentv", "graders"));
|
|
19557
|
+
candidateDirs.push(import_node_path44.default.join(dir, ".agentv", "judges"));
|
|
19558
|
+
dir = import_node_path44.default.dirname(dir);
|
|
19374
19559
|
}
|
|
19375
19560
|
let files = [];
|
|
19376
19561
|
for (const gradersDir of candidateDirs) {
|
|
@@ -19386,7 +19571,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
19386
19571
|
}
|
|
19387
19572
|
const discoveredTypes = [];
|
|
19388
19573
|
for (const filePath of files) {
|
|
19389
|
-
const basename =
|
|
19574
|
+
const basename = import_node_path44.default.basename(filePath);
|
|
19390
19575
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
19391
19576
|
if (registry.has(typeName)) {
|
|
19392
19577
|
continue;
|
|
@@ -19544,59 +19729,9 @@ function getTCritical(df) {
|
|
|
19544
19729
|
return T_TABLE_95[df - 1];
|
|
19545
19730
|
}
|
|
19546
19731
|
|
|
19547
|
-
// src/evaluation/workspace/file-changes.ts
|
|
19548
|
-
init_cjs_shims();
|
|
19549
|
-
var import_node_child_process8 = require("child_process");
|
|
19550
|
-
var import_node_fs13 = require("fs");
|
|
19551
|
-
var import_node_path44 = __toESM(require("path"), 1);
|
|
19552
|
-
var import_node_util4 = require("util");
|
|
19553
|
-
var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process8.exec);
|
|
19554
|
-
function gitExecOpts(workspacePath) {
|
|
19555
|
-
const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
|
|
19556
|
-
return { cwd: workspacePath, env };
|
|
19557
|
-
}
|
|
19558
|
-
async function initializeBaseline(workspacePath) {
|
|
19559
|
-
const opts = gitExecOpts(workspacePath);
|
|
19560
|
-
await execAsync4("git init", opts);
|
|
19561
|
-
await execAsync4("git add -A", opts);
|
|
19562
|
-
await execAsync4(
|
|
19563
|
-
'git -c user.email=agentv@localhost -c user.name=agentv commit --allow-empty -m "agentv-baseline"',
|
|
19564
|
-
opts
|
|
19565
|
-
);
|
|
19566
|
-
const { stdout } = await execAsync4("git rev-parse HEAD", opts);
|
|
19567
|
-
return stdout.trim();
|
|
19568
|
-
}
|
|
19569
|
-
async function captureFileChanges(workspacePath, baselineCommit) {
|
|
19570
|
-
const opts = gitExecOpts(workspacePath);
|
|
19571
|
-
await stageNestedRepoChanges(workspacePath);
|
|
19572
|
-
await execAsync4("git add -A", opts);
|
|
19573
|
-
const { stdout } = await execAsync4(`git diff ${baselineCommit} --submodule=diff`, opts);
|
|
19574
|
-
return stdout.trim();
|
|
19575
|
-
}
|
|
19576
|
-
async function stageNestedRepoChanges(workspacePath) {
|
|
19577
|
-
let entries;
|
|
19578
|
-
try {
|
|
19579
|
-
entries = (0, import_node_fs13.readdirSync)(workspacePath);
|
|
19580
|
-
} catch {
|
|
19581
|
-
return;
|
|
19582
|
-
}
|
|
19583
|
-
for (const entry of entries) {
|
|
19584
|
-
if (entry === ".git" || entry === "node_modules") continue;
|
|
19585
|
-
const childPath = import_node_path44.default.join(workspacePath, entry);
|
|
19586
|
-
try {
|
|
19587
|
-
if (!(0, import_node_fs13.statSync)(childPath).isDirectory()) continue;
|
|
19588
|
-
if (!(0, import_node_fs13.statSync)(import_node_path44.default.join(childPath, ".git")).isDirectory()) continue;
|
|
19589
|
-
} catch {
|
|
19590
|
-
continue;
|
|
19591
|
-
}
|
|
19592
|
-
const childOpts = gitExecOpts(childPath);
|
|
19593
|
-
await execAsync4("git add -A", childOpts);
|
|
19594
|
-
}
|
|
19595
|
-
}
|
|
19596
|
-
|
|
19597
19732
|
// src/evaluation/workspace/manager.ts
|
|
19598
19733
|
init_cjs_shims();
|
|
19599
|
-
var
|
|
19734
|
+
var import_promises33 = require("fs/promises");
|
|
19600
19735
|
var import_node_path45 = __toESM(require("path"), 1);
|
|
19601
19736
|
var TemplateNotFoundError = class extends Error {
|
|
19602
19737
|
constructor(templatePath) {
|
|
@@ -19619,7 +19754,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
19619
19754
|
};
|
|
19620
19755
|
async function isDirectory(filePath) {
|
|
19621
19756
|
try {
|
|
19622
|
-
const stats = await (0,
|
|
19757
|
+
const stats = await (0, import_promises33.stat)(filePath);
|
|
19623
19758
|
return stats.isDirectory();
|
|
19624
19759
|
} catch {
|
|
19625
19760
|
return false;
|
|
@@ -19630,8 +19765,8 @@ function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
|
19630
19765
|
return import_node_path45.default.join(root, evalRunId, caseId);
|
|
19631
19766
|
}
|
|
19632
19767
|
async function copyDirectoryRecursive(src, dest) {
|
|
19633
|
-
await (0,
|
|
19634
|
-
const entries = await (0,
|
|
19768
|
+
await (0, import_promises33.mkdir)(dest, { recursive: true });
|
|
19769
|
+
const entries = await (0, import_promises33.readdir)(src, { withFileTypes: true });
|
|
19635
19770
|
for (const entry of entries) {
|
|
19636
19771
|
const srcPath = import_node_path45.default.join(src, entry.name);
|
|
19637
19772
|
const destPath = import_node_path45.default.join(dest, entry.name);
|
|
@@ -19641,7 +19776,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
19641
19776
|
if (entry.isDirectory()) {
|
|
19642
19777
|
await copyDirectoryRecursive(srcPath, destPath);
|
|
19643
19778
|
} else {
|
|
19644
|
-
await (0,
|
|
19779
|
+
await (0, import_promises33.cp)(srcPath, destPath, { preserveTimestamps: true });
|
|
19645
19780
|
}
|
|
19646
19781
|
}
|
|
19647
19782
|
}
|
|
@@ -19656,7 +19791,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
19656
19791
|
const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
|
|
19657
19792
|
try {
|
|
19658
19793
|
if (await fileExists2(workspacePath)) {
|
|
19659
|
-
await (0,
|
|
19794
|
+
await (0, import_promises33.rm)(workspacePath, { recursive: true, force: true });
|
|
19660
19795
|
}
|
|
19661
19796
|
await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
|
|
19662
19797
|
return workspacePath;
|
|
@@ -19690,14 +19825,14 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
19690
19825
|
}
|
|
19691
19826
|
async function cleanupWorkspace(workspacePath) {
|
|
19692
19827
|
if (await fileExists2(workspacePath)) {
|
|
19693
|
-
await (0,
|
|
19828
|
+
await (0, import_promises33.rm)(workspacePath, { recursive: true, force: true });
|
|
19694
19829
|
}
|
|
19695
19830
|
}
|
|
19696
19831
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
19697
19832
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
19698
19833
|
const evalDir = import_node_path45.default.join(root, evalRunId);
|
|
19699
19834
|
if (await fileExists2(evalDir)) {
|
|
19700
|
-
await (0,
|
|
19835
|
+
await (0, import_promises33.rm)(evalDir, { recursive: true, force: true });
|
|
19701
19836
|
}
|
|
19702
19837
|
}
|
|
19703
19838
|
|
|
@@ -19706,7 +19841,7 @@ init_cjs_shims();
|
|
|
19706
19841
|
var import_node_child_process9 = require("child_process");
|
|
19707
19842
|
var import_node_crypto10 = require("crypto");
|
|
19708
19843
|
var import_node_fs14 = require("fs");
|
|
19709
|
-
var
|
|
19844
|
+
var import_promises34 = require("fs/promises");
|
|
19710
19845
|
var import_node_path46 = __toESM(require("path"), 1);
|
|
19711
19846
|
var import_node_util5 = require("util");
|
|
19712
19847
|
var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process9.execFile);
|
|
@@ -19760,8 +19895,8 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
19760
19895
|
return (0, import_node_crypto10.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
19761
19896
|
}
|
|
19762
19897
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
19763
|
-
await (0,
|
|
19764
|
-
const entries = await (0,
|
|
19898
|
+
await (0, import_promises34.mkdir)(dest, { recursive: true });
|
|
19899
|
+
const entries = await (0, import_promises34.readdir)(src, { withFileTypes: true });
|
|
19765
19900
|
for (const entry of entries) {
|
|
19766
19901
|
const srcPath = import_node_path46.default.join(src, entry.name);
|
|
19767
19902
|
const destPath = import_node_path46.default.join(dest, entry.name);
|
|
@@ -19774,7 +19909,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
19774
19909
|
}
|
|
19775
19910
|
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
19776
19911
|
} else {
|
|
19777
|
-
await (0,
|
|
19912
|
+
await (0, import_promises34.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
19778
19913
|
}
|
|
19779
19914
|
}
|
|
19780
19915
|
}
|
|
@@ -19798,7 +19933,7 @@ var WorkspacePoolManager = class {
|
|
|
19798
19933
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
19799
19934
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
19800
19935
|
const poolDir = import_node_path46.default.join(this.poolRoot, fingerprint);
|
|
19801
|
-
await (0,
|
|
19936
|
+
await (0, import_promises34.mkdir)(poolDir, { recursive: true });
|
|
19802
19937
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
19803
19938
|
if (drifted) {
|
|
19804
19939
|
console.warn(
|
|
@@ -19825,7 +19960,7 @@ var WorkspacePoolManager = class {
|
|
|
19825
19960
|
poolDir
|
|
19826
19961
|
};
|
|
19827
19962
|
}
|
|
19828
|
-
await (0,
|
|
19963
|
+
await (0, import_promises34.mkdir)(slotPath, { recursive: true });
|
|
19829
19964
|
if (templatePath) {
|
|
19830
19965
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
19831
19966
|
}
|
|
@@ -19849,7 +19984,7 @@ var WorkspacePoolManager = class {
|
|
|
19849
19984
|
/** Remove lock file to release a slot. */
|
|
19850
19985
|
async releaseSlot(slot) {
|
|
19851
19986
|
try {
|
|
19852
|
-
await (0,
|
|
19987
|
+
await (0, import_promises34.unlink)(slot.lockPath);
|
|
19853
19988
|
} catch {
|
|
19854
19989
|
}
|
|
19855
19990
|
}
|
|
@@ -19862,21 +19997,21 @@ var WorkspacePoolManager = class {
|
|
|
19862
19997
|
async tryLock(lockPath) {
|
|
19863
19998
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
19864
19999
|
try {
|
|
19865
|
-
await (0,
|
|
20000
|
+
await (0, import_promises34.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
19866
20001
|
return true;
|
|
19867
20002
|
} catch (err) {
|
|
19868
20003
|
if (err.code !== "EEXIST") {
|
|
19869
20004
|
throw err;
|
|
19870
20005
|
}
|
|
19871
20006
|
try {
|
|
19872
|
-
const pidStr = await (0,
|
|
20007
|
+
const pidStr = await (0, import_promises34.readFile)(lockPath, "utf-8");
|
|
19873
20008
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
19874
20009
|
if (!Number.isNaN(pid)) {
|
|
19875
20010
|
try {
|
|
19876
20011
|
process.kill(pid, 0);
|
|
19877
20012
|
return false;
|
|
19878
20013
|
} catch {
|
|
19879
|
-
await (0,
|
|
20014
|
+
await (0, import_promises34.unlink)(lockPath).catch(() => {
|
|
19880
20015
|
});
|
|
19881
20016
|
continue;
|
|
19882
20017
|
}
|
|
@@ -19896,7 +20031,7 @@ var WorkspacePoolManager = class {
|
|
|
19896
20031
|
async checkDrift(poolDir, fingerprint) {
|
|
19897
20032
|
const metadataPath = import_node_path46.default.join(poolDir, "metadata.json");
|
|
19898
20033
|
try {
|
|
19899
|
-
const raw = await (0,
|
|
20034
|
+
const raw = await (0, import_promises34.readFile)(metadataPath, "utf-8");
|
|
19900
20035
|
const metadata = JSON.parse(raw);
|
|
19901
20036
|
return metadata.fingerprint !== fingerprint;
|
|
19902
20037
|
} catch {
|
|
@@ -19911,17 +20046,17 @@ var WorkspacePoolManager = class {
|
|
|
19911
20046
|
repos,
|
|
19912
20047
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
19913
20048
|
};
|
|
19914
|
-
await (0,
|
|
20049
|
+
await (0, import_promises34.writeFile)(import_node_path46.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
19915
20050
|
}
|
|
19916
20051
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
19917
20052
|
async removeAllSlots(poolDir) {
|
|
19918
|
-
const entries = await (0,
|
|
20053
|
+
const entries = await (0, import_promises34.readdir)(poolDir);
|
|
19919
20054
|
for (const entry of entries) {
|
|
19920
20055
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
19921
20056
|
const lockPath = import_node_path46.default.join(poolDir, `${entry}.lock`);
|
|
19922
20057
|
if ((0, import_node_fs14.existsSync)(lockPath)) {
|
|
19923
20058
|
try {
|
|
19924
|
-
const pidStr = await (0,
|
|
20059
|
+
const pidStr = await (0, import_promises34.readFile)(lockPath, "utf-8");
|
|
19925
20060
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
19926
20061
|
if (!Number.isNaN(pid)) {
|
|
19927
20062
|
try {
|
|
@@ -19934,12 +20069,12 @@ var WorkspacePoolManager = class {
|
|
|
19934
20069
|
} catch {
|
|
19935
20070
|
}
|
|
19936
20071
|
}
|
|
19937
|
-
await (0,
|
|
19938
|
-
await (0,
|
|
20072
|
+
await (0, import_promises34.rm)(import_node_path46.default.join(poolDir, entry), { recursive: true, force: true });
|
|
20073
|
+
await (0, import_promises34.rm)(lockPath, { force: true }).catch(() => {
|
|
19939
20074
|
});
|
|
19940
20075
|
}
|
|
19941
20076
|
}
|
|
19942
|
-
await (0,
|
|
20077
|
+
await (0, import_promises34.rm)(import_node_path46.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
19943
20078
|
});
|
|
19944
20079
|
}
|
|
19945
20080
|
/**
|
|
@@ -20198,14 +20333,14 @@ ${lines.join("\n")}`;
|
|
|
20198
20333
|
|
|
20199
20334
|
// src/evaluation/workspace/resolve.ts
|
|
20200
20335
|
init_cjs_shims();
|
|
20201
|
-
var
|
|
20336
|
+
var import_promises35 = require("fs/promises");
|
|
20202
20337
|
var import_node_path48 = __toESM(require("path"), 1);
|
|
20203
20338
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
20204
20339
|
if (!templatePath) {
|
|
20205
20340
|
return void 0;
|
|
20206
20341
|
}
|
|
20207
20342
|
const resolved = import_node_path48.default.resolve(templatePath);
|
|
20208
|
-
const stats = await (0,
|
|
20343
|
+
const stats = await (0, import_promises35.stat)(resolved);
|
|
20209
20344
|
if (stats.isFile()) {
|
|
20210
20345
|
return {
|
|
20211
20346
|
dir: import_node_path48.default.dirname(resolved),
|
|
@@ -20215,7 +20350,7 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
20215
20350
|
if (!stats.isDirectory()) {
|
|
20216
20351
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
20217
20352
|
}
|
|
20218
|
-
const entries = await (0,
|
|
20353
|
+
const entries = await (0, import_promises35.readdir)(resolved);
|
|
20219
20354
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
20220
20355
|
if (workspaceFiles.length === 1) {
|
|
20221
20356
|
return {
|
|
@@ -20323,6 +20458,100 @@ function getWorkspaceTemplate(target) {
|
|
|
20323
20458
|
}
|
|
20324
20459
|
return void 0;
|
|
20325
20460
|
}
|
|
20461
|
+
function validateDependencyGraph(tests) {
|
|
20462
|
+
const ids = /* @__PURE__ */ new Set();
|
|
20463
|
+
for (const test of tests) {
|
|
20464
|
+
if (ids.has(test.id)) {
|
|
20465
|
+
throw new Error(`Duplicate test ID '${test.id}' \u2014 each test must have a unique ID`);
|
|
20466
|
+
}
|
|
20467
|
+
ids.add(test.id);
|
|
20468
|
+
}
|
|
20469
|
+
for (const test of tests) {
|
|
20470
|
+
if (!test.depends_on) continue;
|
|
20471
|
+
for (const dep of test.depends_on) {
|
|
20472
|
+
if (!ids.has(dep)) {
|
|
20473
|
+
throw new Error(
|
|
20474
|
+
`Test '${test.id}' depends on '${dep}', but no test with that ID exists in this suite`
|
|
20475
|
+
);
|
|
20476
|
+
}
|
|
20477
|
+
if (dep === test.id) {
|
|
20478
|
+
throw new Error(`Test '${test.id}' depends on itself`);
|
|
20479
|
+
}
|
|
20480
|
+
}
|
|
20481
|
+
}
|
|
20482
|
+
const depMap = /* @__PURE__ */ new Map();
|
|
20483
|
+
for (const test of tests) {
|
|
20484
|
+
if (test.depends_on && test.depends_on.length > 0) {
|
|
20485
|
+
depMap.set(test.id, test.depends_on);
|
|
20486
|
+
}
|
|
20487
|
+
}
|
|
20488
|
+
const visited = /* @__PURE__ */ new Set();
|
|
20489
|
+
const visiting = /* @__PURE__ */ new Set();
|
|
20490
|
+
function visit(id, path56) {
|
|
20491
|
+
if (visiting.has(id)) {
|
|
20492
|
+
const cycle = [...path56.slice(path56.indexOf(id)), id];
|
|
20493
|
+
throw new Error(`Circular dependency detected: ${cycle.join(" \u2192 ")}`);
|
|
20494
|
+
}
|
|
20495
|
+
if (visited.has(id)) return;
|
|
20496
|
+
visiting.add(id);
|
|
20497
|
+
path56.push(id);
|
|
20498
|
+
for (const dep of depMap.get(id) ?? []) {
|
|
20499
|
+
visit(dep, path56);
|
|
20500
|
+
}
|
|
20501
|
+
path56.pop();
|
|
20502
|
+
visiting.delete(id);
|
|
20503
|
+
visited.add(id);
|
|
20504
|
+
}
|
|
20505
|
+
for (const test of tests) {
|
|
20506
|
+
visit(test.id, []);
|
|
20507
|
+
}
|
|
20508
|
+
}
|
|
20509
|
+
function computeWaves(tests) {
|
|
20510
|
+
const hasDeps = tests.some((t) => t.depends_on && t.depends_on.length > 0);
|
|
20511
|
+
if (!hasDeps) {
|
|
20512
|
+
return [tests.slice()];
|
|
20513
|
+
}
|
|
20514
|
+
const inDegree = /* @__PURE__ */ new Map();
|
|
20515
|
+
const dependents = /* @__PURE__ */ new Map();
|
|
20516
|
+
const testById = /* @__PURE__ */ new Map();
|
|
20517
|
+
for (const test of tests) {
|
|
20518
|
+
testById.set(test.id, test);
|
|
20519
|
+
inDegree.set(test.id, 0);
|
|
20520
|
+
}
|
|
20521
|
+
for (const test of tests) {
|
|
20522
|
+
if (!test.depends_on) continue;
|
|
20523
|
+
inDegree.set(test.id, test.depends_on.length);
|
|
20524
|
+
for (const dep of test.depends_on) {
|
|
20525
|
+
const list = dependents.get(dep) ?? [];
|
|
20526
|
+
list.push(test.id);
|
|
20527
|
+
dependents.set(dep, list);
|
|
20528
|
+
}
|
|
20529
|
+
}
|
|
20530
|
+
const waves = [];
|
|
20531
|
+
let ready = tests.filter((t) => (inDegree.get(t.id) ?? 0) === 0);
|
|
20532
|
+
while (ready.length > 0) {
|
|
20533
|
+
waves.push(ready);
|
|
20534
|
+
const nextReady = [];
|
|
20535
|
+
for (const test of ready) {
|
|
20536
|
+
for (const depId of dependents.get(test.id) ?? []) {
|
|
20537
|
+
const newDeg = (inDegree.get(depId) ?? 1) - 1;
|
|
20538
|
+
inDegree.set(depId, newDeg);
|
|
20539
|
+
if (newDeg === 0) {
|
|
20540
|
+
const depTest = testById.get(depId);
|
|
20541
|
+
if (depTest) nextReady.push(depTest);
|
|
20542
|
+
}
|
|
20543
|
+
}
|
|
20544
|
+
}
|
|
20545
|
+
ready = nextReady;
|
|
20546
|
+
}
|
|
20547
|
+
const totalScheduled = waves.reduce((sum, w) => sum + w.length, 0);
|
|
20548
|
+
if (totalScheduled !== tests.length) {
|
|
20549
|
+
throw new Error(
|
|
20550
|
+
`Internal error: ${tests.length - totalScheduled} tests were not scheduled (possible undetected cycle)`
|
|
20551
|
+
);
|
|
20552
|
+
}
|
|
20553
|
+
return waves;
|
|
20554
|
+
}
|
|
20326
20555
|
async function runEvaluation(options) {
|
|
20327
20556
|
const {
|
|
20328
20557
|
testFilePath: evalFilePath,
|
|
@@ -20590,14 +20819,14 @@ async function runEvaluation(options) {
|
|
|
20590
20819
|
let staticMaterialised = false;
|
|
20591
20820
|
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
20592
20821
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
20593
|
-
const dirExists = await (0,
|
|
20822
|
+
const dirExists = await (0, import_promises36.stat)(configuredStaticPath).then(
|
|
20594
20823
|
(s) => s.isDirectory(),
|
|
20595
20824
|
() => false
|
|
20596
20825
|
);
|
|
20597
|
-
const isEmpty = dirExists ? (await (0,
|
|
20826
|
+
const isEmpty = dirExists ? (await (0, import_promises36.readdir)(configuredStaticPath)).length === 0 : false;
|
|
20598
20827
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
20599
20828
|
if (!dirExists) {
|
|
20600
|
-
await (0,
|
|
20829
|
+
await (0, import_promises36.mkdir)(configuredStaticPath, { recursive: true });
|
|
20601
20830
|
}
|
|
20602
20831
|
if (workspaceTemplate) {
|
|
20603
20832
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -20642,14 +20871,45 @@ async function runEvaluation(options) {
|
|
|
20642
20871
|
}
|
|
20643
20872
|
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
20644
20873
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
20645
|
-
await (0,
|
|
20874
|
+
await (0, import_promises36.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
20646
20875
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
20647
20876
|
}
|
|
20648
20877
|
try {
|
|
20878
|
+
let toDependencyResult2 = function(r) {
|
|
20879
|
+
const outputText = extractLastAssistantContent(r.output);
|
|
20880
|
+
return {
|
|
20881
|
+
score: r.score,
|
|
20882
|
+
output: outputText,
|
|
20883
|
+
workspace_path: r.workspacePath,
|
|
20884
|
+
details: r.scores ? Object.fromEntries(
|
|
20885
|
+
r.scores.map((s) => [s.name, { score: s.score, verdict: s.verdict }])
|
|
20886
|
+
) : void 0,
|
|
20887
|
+
status: r.executionStatus === "ok" ? "passed" : r.executionStatus === "execution_error" ? "error" : "failed"
|
|
20888
|
+
};
|
|
20889
|
+
}, checkDependencies2 = function(evalCase) {
|
|
20890
|
+
const depResults = {};
|
|
20891
|
+
if (!evalCase.depends_on || evalCase.depends_on.length === 0) {
|
|
20892
|
+
return { ok: true, depResults };
|
|
20893
|
+
}
|
|
20894
|
+
let allPassed = true;
|
|
20895
|
+
for (const depId of evalCase.depends_on) {
|
|
20896
|
+
const depResult = completedResults.get(depId);
|
|
20897
|
+
if (depResult) {
|
|
20898
|
+
depResults[depId] = toDependencyResult2(depResult);
|
|
20899
|
+
if (depResult.executionStatus === "execution_error") {
|
|
20900
|
+
allPassed = false;
|
|
20901
|
+
}
|
|
20902
|
+
} else {
|
|
20903
|
+
allPassed = false;
|
|
20904
|
+
}
|
|
20905
|
+
}
|
|
20906
|
+
return { ok: allPassed, depResults };
|
|
20907
|
+
};
|
|
20908
|
+
var toDependencyResult = toDependencyResult2, checkDependencies = checkDependencies2;
|
|
20649
20909
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
20650
20910
|
const copiedWorkspaceFile = import_node_path49.default.join(sharedWorkspacePath, import_node_path49.default.basename(suiteWorkspaceFile));
|
|
20651
20911
|
try {
|
|
20652
|
-
await (0,
|
|
20912
|
+
await (0, import_promises36.stat)(copiedWorkspaceFile);
|
|
20653
20913
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
20654
20914
|
} catch {
|
|
20655
20915
|
}
|
|
@@ -20756,8 +21016,9 @@ async function runEvaluation(options) {
|
|
|
20756
21016
|
try {
|
|
20757
21017
|
sharedBaselineCommit = await initializeBaseline(sharedWorkspacePath);
|
|
20758
21018
|
setupLog(`shared baseline initialized: ${sharedBaselineCommit}`);
|
|
20759
|
-
} catch {
|
|
20760
|
-
|
|
21019
|
+
} catch (error) {
|
|
21020
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
21021
|
+
setupLog(`shared baseline initialization failed (file_changes unavailable): ${message}`);
|
|
20761
21022
|
}
|
|
20762
21023
|
}
|
|
20763
21024
|
if (availablePoolSlots.length > 0) {
|
|
@@ -20766,8 +21027,11 @@ async function runEvaluation(options) {
|
|
|
20766
21027
|
const baseline = await initializeBaseline(slot.path);
|
|
20767
21028
|
poolSlotBaselines.set(slot.path, baseline);
|
|
20768
21029
|
setupLog(`pool slot ${slot.index} baseline initialized: ${baseline}`);
|
|
20769
|
-
} catch {
|
|
20770
|
-
|
|
21030
|
+
} catch (error) {
|
|
21031
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
21032
|
+
setupLog(
|
|
21033
|
+
`pool slot ${slot.index} baseline initialization failed (file_changes unavailable): ${message}`
|
|
21034
|
+
);
|
|
20771
21035
|
}
|
|
20772
21036
|
}
|
|
20773
21037
|
}
|
|
@@ -20777,204 +21041,259 @@ async function runEvaluation(options) {
|
|
|
20777
21041
|
let cumulativeBudgetCost = 0;
|
|
20778
21042
|
let budgetExhausted = false;
|
|
20779
21043
|
let failOnErrorTriggered = false;
|
|
20780
|
-
|
|
20781
|
-
|
|
20782
|
-
|
|
20783
|
-
|
|
20784
|
-
|
|
20785
|
-
|
|
20786
|
-
|
|
21044
|
+
validateDependencyGraph(filteredEvalCases);
|
|
21045
|
+
const waves = computeWaves(filteredEvalCases);
|
|
21046
|
+
const completedResults = /* @__PURE__ */ new Map();
|
|
21047
|
+
const results = [];
|
|
21048
|
+
async function dispatchTest(evalCase, depResults) {
|
|
21049
|
+
const workerId = nextWorkerId++;
|
|
21050
|
+
workerIdByEvalId.set(evalCase.id, workerId);
|
|
21051
|
+
if (totalBudgetUsd !== void 0 && budgetExhausted) {
|
|
21052
|
+
const budgetResult = {
|
|
21053
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
21054
|
+
testId: evalCase.id,
|
|
21055
|
+
suite: evalCase.suite,
|
|
21056
|
+
category: evalCase.category,
|
|
21057
|
+
score: 0,
|
|
21058
|
+
assertions: [],
|
|
21059
|
+
output: [],
|
|
21060
|
+
target: target.name,
|
|
21061
|
+
error: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
21062
|
+
budgetExceeded: true,
|
|
21063
|
+
executionStatus: "execution_error",
|
|
21064
|
+
failureStage: "setup",
|
|
21065
|
+
failureReasonCode: "budget_exceeded",
|
|
21066
|
+
executionError: {
|
|
21067
|
+
message: `Suite budget exceeded ($${cumulativeBudgetCost.toFixed(4)} / $${totalBudgetUsd.toFixed(4)})`,
|
|
21068
|
+
stage: "setup"
|
|
21069
|
+
}
|
|
21070
|
+
};
|
|
21071
|
+
if (onProgress) {
|
|
21072
|
+
await onProgress({
|
|
21073
|
+
workerId,
|
|
20787
21074
|
testId: evalCase.id,
|
|
20788
|
-
|
|
20789
|
-
|
|
20790
|
-
|
|
20791
|
-
|
|
20792
|
-
|
|
20793
|
-
|
|
20794
|
-
|
|
20795
|
-
|
|
20796
|
-
|
|
20797
|
-
|
|
20798
|
-
|
|
20799
|
-
|
|
20800
|
-
|
|
20801
|
-
|
|
21075
|
+
status: "failed",
|
|
21076
|
+
completedAt: Date.now(),
|
|
21077
|
+
error: budgetResult.error,
|
|
21078
|
+
score: budgetResult.score,
|
|
21079
|
+
executionStatus: budgetResult.executionStatus
|
|
21080
|
+
});
|
|
21081
|
+
}
|
|
21082
|
+
if (onResult) {
|
|
21083
|
+
await onResult(budgetResult);
|
|
21084
|
+
}
|
|
21085
|
+
return budgetResult;
|
|
21086
|
+
}
|
|
21087
|
+
if (failOnError === true && failOnErrorTriggered) {
|
|
21088
|
+
const errorMsg = "Halted: execution error encountered with fail_on_error enabled";
|
|
21089
|
+
const haltResult = {
|
|
21090
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
21091
|
+
testId: evalCase.id,
|
|
21092
|
+
suite: evalCase.suite,
|
|
21093
|
+
category: evalCase.category,
|
|
21094
|
+
score: 0,
|
|
21095
|
+
assertions: [],
|
|
21096
|
+
output: [],
|
|
21097
|
+
target: target.name,
|
|
21098
|
+
error: errorMsg,
|
|
21099
|
+
executionStatus: "execution_error",
|
|
21100
|
+
failureStage: "setup",
|
|
21101
|
+
failureReasonCode: "error_threshold_exceeded",
|
|
21102
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
21103
|
+
};
|
|
21104
|
+
if (onProgress) {
|
|
21105
|
+
await onProgress({
|
|
21106
|
+
workerId,
|
|
21107
|
+
testId: evalCase.id,
|
|
21108
|
+
status: "failed",
|
|
21109
|
+
completedAt: Date.now(),
|
|
21110
|
+
error: haltResult.error,
|
|
21111
|
+
score: haltResult.score,
|
|
21112
|
+
executionStatus: haltResult.executionStatus
|
|
21113
|
+
});
|
|
21114
|
+
}
|
|
21115
|
+
if (onResult) {
|
|
21116
|
+
await onResult(haltResult);
|
|
21117
|
+
}
|
|
21118
|
+
return haltResult;
|
|
21119
|
+
}
|
|
21120
|
+
if (onProgress) {
|
|
21121
|
+
await onProgress({
|
|
21122
|
+
workerId,
|
|
21123
|
+
testId: evalCase.id,
|
|
21124
|
+
status: "running",
|
|
21125
|
+
startedAt: Date.now()
|
|
21126
|
+
});
|
|
21127
|
+
}
|
|
21128
|
+
const testPoolSlot = availablePoolSlots.length > 0 ? availablePoolSlots.pop() : void 0;
|
|
21129
|
+
const testWorkspacePath = testPoolSlot?.path ?? sharedWorkspacePath;
|
|
21130
|
+
const testBaselineCommit = testPoolSlot ? poolSlotBaselines.get(testPoolSlot.path) : sharedBaselineCommit;
|
|
21131
|
+
try {
|
|
21132
|
+
const graderProvider = await resolveGraderProvider(target);
|
|
21133
|
+
const runCaseOptions = {
|
|
21134
|
+
evalCase,
|
|
21135
|
+
provider: primaryProvider,
|
|
21136
|
+
target,
|
|
21137
|
+
evaluators: evaluatorRegistry,
|
|
21138
|
+
maxRetries,
|
|
21139
|
+
agentTimeoutMs,
|
|
21140
|
+
cache,
|
|
21141
|
+
useCache,
|
|
21142
|
+
now,
|
|
21143
|
+
graderProvider,
|
|
21144
|
+
targetResolver,
|
|
21145
|
+
availableTargets,
|
|
21146
|
+
evalRunId,
|
|
21147
|
+
keepWorkspaces,
|
|
21148
|
+
cleanupWorkspaces,
|
|
21149
|
+
retainOnSuccess: resolvedRetainOnSuccess,
|
|
21150
|
+
retainOnFailure: resolvedRetainOnFailure,
|
|
21151
|
+
sharedWorkspacePath: testWorkspacePath,
|
|
21152
|
+
sharedBaselineCommit: testBaselineCommit,
|
|
21153
|
+
suiteWorkspaceFile,
|
|
21154
|
+
streamCallbacks,
|
|
21155
|
+
typeRegistry,
|
|
21156
|
+
repoManager,
|
|
21157
|
+
evalDir,
|
|
21158
|
+
verbose,
|
|
21159
|
+
threshold: scoreThreshold,
|
|
21160
|
+
...depResults && Object.keys(depResults).length > 0 ? { dependencyResults: depResults } : {}
|
|
21161
|
+
};
|
|
21162
|
+
let result = trials && trials.count > 1 ? await runEvalCaseWithTrials(runCaseOptions, trials) : await runEvalCase(runCaseOptions);
|
|
21163
|
+
if (totalBudgetUsd !== void 0) {
|
|
21164
|
+
let caseCost;
|
|
21165
|
+
if (result.trials && result.trials.length > 0) {
|
|
21166
|
+
const trialCostSum = result.trials.reduce((sum, t) => sum + (t.costUsd ?? 0), 0);
|
|
21167
|
+
if (trialCostSum > 0) {
|
|
21168
|
+
caseCost = trialCostSum;
|
|
20802
21169
|
}
|
|
20803
|
-
}
|
|
20804
|
-
|
|
20805
|
-
await onProgress({
|
|
20806
|
-
workerId,
|
|
20807
|
-
testId: evalCase.id,
|
|
20808
|
-
status: "failed",
|
|
20809
|
-
completedAt: Date.now(),
|
|
20810
|
-
error: budgetResult.error,
|
|
20811
|
-
score: budgetResult.score,
|
|
20812
|
-
executionStatus: budgetResult.executionStatus
|
|
20813
|
-
});
|
|
21170
|
+
} else {
|
|
21171
|
+
caseCost = result.costUsd;
|
|
20814
21172
|
}
|
|
20815
|
-
if (
|
|
20816
|
-
|
|
21173
|
+
if (caseCost !== void 0) {
|
|
21174
|
+
cumulativeBudgetCost += caseCost;
|
|
21175
|
+
if (cumulativeBudgetCost >= totalBudgetUsd) {
|
|
21176
|
+
budgetExhausted = true;
|
|
21177
|
+
}
|
|
20817
21178
|
}
|
|
20818
|
-
return budgetResult;
|
|
20819
21179
|
}
|
|
20820
|
-
if (failOnError === true &&
|
|
20821
|
-
|
|
20822
|
-
|
|
20823
|
-
|
|
21180
|
+
if (failOnError === true && result.executionStatus === "execution_error") {
|
|
21181
|
+
failOnErrorTriggered = true;
|
|
21182
|
+
}
|
|
21183
|
+
if (beforeAllOutput && !beforeAllOutputAttached) {
|
|
21184
|
+
result = { ...result, beforeAllOutput };
|
|
21185
|
+
beforeAllOutputAttached = true;
|
|
21186
|
+
}
|
|
21187
|
+
if (onProgress) {
|
|
21188
|
+
await onProgress({
|
|
21189
|
+
workerId,
|
|
20824
21190
|
testId: evalCase.id,
|
|
20825
|
-
|
|
20826
|
-
|
|
20827
|
-
|
|
20828
|
-
|
|
20829
|
-
|
|
20830
|
-
|
|
20831
|
-
|
|
20832
|
-
|
|
20833
|
-
failureStage: "setup",
|
|
20834
|
-
failureReasonCode: "error_threshold_exceeded",
|
|
20835
|
-
executionError: { message: errorMsg, stage: "setup" }
|
|
20836
|
-
};
|
|
20837
|
-
if (onProgress) {
|
|
20838
|
-
await onProgress({
|
|
20839
|
-
workerId,
|
|
20840
|
-
testId: evalCase.id,
|
|
20841
|
-
status: "failed",
|
|
20842
|
-
completedAt: Date.now(),
|
|
20843
|
-
error: haltResult.error,
|
|
20844
|
-
score: haltResult.score,
|
|
20845
|
-
executionStatus: haltResult.executionStatus
|
|
20846
|
-
});
|
|
20847
|
-
}
|
|
20848
|
-
if (onResult) {
|
|
20849
|
-
await onResult(haltResult);
|
|
20850
|
-
}
|
|
20851
|
-
return haltResult;
|
|
21191
|
+
status: result.error ? "failed" : "completed",
|
|
21192
|
+
startedAt: 0,
|
|
21193
|
+
// Not used for completed status
|
|
21194
|
+
completedAt: Date.now(),
|
|
21195
|
+
error: result.error,
|
|
21196
|
+
score: result.score,
|
|
21197
|
+
executionStatus: result.executionStatus
|
|
21198
|
+
});
|
|
20852
21199
|
}
|
|
21200
|
+
if (onResult) {
|
|
21201
|
+
await onResult(result);
|
|
21202
|
+
}
|
|
21203
|
+
return result;
|
|
21204
|
+
} catch (error) {
|
|
20853
21205
|
if (onProgress) {
|
|
20854
21206
|
await onProgress({
|
|
20855
21207
|
workerId,
|
|
20856
21208
|
testId: evalCase.id,
|
|
20857
|
-
status: "
|
|
20858
|
-
|
|
21209
|
+
status: "failed",
|
|
21210
|
+
completedAt: Date.now(),
|
|
21211
|
+
error: error instanceof Error ? error.message : String(error)
|
|
20859
21212
|
});
|
|
20860
21213
|
}
|
|
20861
|
-
|
|
20862
|
-
|
|
20863
|
-
|
|
20864
|
-
|
|
20865
|
-
|
|
20866
|
-
|
|
20867
|
-
|
|
20868
|
-
|
|
20869
|
-
|
|
20870
|
-
|
|
20871
|
-
|
|
20872
|
-
|
|
20873
|
-
|
|
20874
|
-
|
|
20875
|
-
|
|
20876
|
-
|
|
20877
|
-
|
|
20878
|
-
|
|
20879
|
-
|
|
20880
|
-
|
|
20881
|
-
|
|
20882
|
-
|
|
20883
|
-
|
|
20884
|
-
|
|
20885
|
-
|
|
20886
|
-
|
|
20887
|
-
|
|
20888
|
-
|
|
20889
|
-
|
|
20890
|
-
|
|
20891
|
-
|
|
20892
|
-
|
|
20893
|
-
|
|
20894
|
-
|
|
20895
|
-
|
|
20896
|
-
|
|
20897
|
-
|
|
20898
|
-
|
|
20899
|
-
|
|
20900
|
-
|
|
20901
|
-
|
|
20902
|
-
|
|
20903
|
-
|
|
20904
|
-
|
|
20905
|
-
|
|
20906
|
-
|
|
20907
|
-
|
|
20908
|
-
|
|
21214
|
+
throw error;
|
|
21215
|
+
} finally {
|
|
21216
|
+
if (testPoolSlot) {
|
|
21217
|
+
availablePoolSlots.push(testPoolSlot);
|
|
21218
|
+
}
|
|
21219
|
+
}
|
|
21220
|
+
}
|
|
21221
|
+
for (const wave of waves) {
|
|
21222
|
+
const wavePromises = wave.map(
|
|
21223
|
+
(evalCase) => limit(async () => {
|
|
21224
|
+
if (evalCase.depends_on && evalCase.depends_on.length > 0) {
|
|
21225
|
+
const { ok, depResults } = checkDependencies2(evalCase);
|
|
21226
|
+
if (!ok) {
|
|
21227
|
+
const policy = evalCase.on_dependency_failure ?? "skip";
|
|
21228
|
+
if (policy === "skip" || policy === "fail") {
|
|
21229
|
+
const failedDeps = evalCase.depends_on.filter(
|
|
21230
|
+
(d) => completedResults.get(d)?.executionStatus === "execution_error"
|
|
21231
|
+
);
|
|
21232
|
+
const prefix = policy === "skip" ? "Skipped" : "Failed";
|
|
21233
|
+
const errorMsg = `${prefix}: dependency failed (${failedDeps.join(", ")})`;
|
|
21234
|
+
const depFailResult = {
|
|
21235
|
+
timestamp: (now ?? (() => /* @__PURE__ */ new Date()))().toISOString(),
|
|
21236
|
+
testId: evalCase.id,
|
|
21237
|
+
suite: evalCase.suite,
|
|
21238
|
+
category: evalCase.category,
|
|
21239
|
+
score: 0,
|
|
21240
|
+
assertions: [],
|
|
21241
|
+
output: [],
|
|
21242
|
+
target: target.name,
|
|
21243
|
+
error: errorMsg,
|
|
21244
|
+
executionStatus: "execution_error",
|
|
21245
|
+
failureStage: "setup",
|
|
21246
|
+
failureReasonCode: "dependency_failed",
|
|
21247
|
+
executionError: { message: errorMsg, stage: "setup" }
|
|
21248
|
+
};
|
|
21249
|
+
if (onProgress) {
|
|
21250
|
+
await onProgress({
|
|
21251
|
+
workerId: nextWorkerId++,
|
|
21252
|
+
testId: evalCase.id,
|
|
21253
|
+
status: "failed",
|
|
21254
|
+
completedAt: Date.now(),
|
|
21255
|
+
error: depFailResult.error,
|
|
21256
|
+
score: 0,
|
|
21257
|
+
executionStatus: depFailResult.executionStatus
|
|
21258
|
+
});
|
|
21259
|
+
}
|
|
21260
|
+
if (onResult) {
|
|
21261
|
+
await onResult(depFailResult);
|
|
21262
|
+
}
|
|
21263
|
+
return depFailResult;
|
|
20909
21264
|
}
|
|
20910
21265
|
}
|
|
21266
|
+
return dispatchTest(evalCase, depResults);
|
|
20911
21267
|
}
|
|
20912
|
-
|
|
20913
|
-
|
|
20914
|
-
|
|
20915
|
-
|
|
20916
|
-
|
|
20917
|
-
|
|
20918
|
-
|
|
20919
|
-
|
|
20920
|
-
|
|
20921
|
-
|
|
20922
|
-
|
|
20923
|
-
|
|
20924
|
-
|
|
20925
|
-
|
|
20926
|
-
|
|
20927
|
-
|
|
20928
|
-
|
|
20929
|
-
|
|
20930
|
-
|
|
20931
|
-
|
|
21268
|
+
return dispatchTest(evalCase);
|
|
21269
|
+
})
|
|
21270
|
+
);
|
|
21271
|
+
const settled = await Promise.allSettled(wavePromises);
|
|
21272
|
+
for (let i = 0; i < settled.length; i++) {
|
|
21273
|
+
const outcome = settled[i];
|
|
21274
|
+
const evalCase = wave[i];
|
|
21275
|
+
if (outcome.status === "fulfilled") {
|
|
21276
|
+
completedResults.set(evalCase.id, outcome.value);
|
|
21277
|
+
results.push(outcome.value);
|
|
21278
|
+
} else {
|
|
21279
|
+
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
21280
|
+
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
21281
|
+
const errorResult = buildErrorResult(
|
|
21282
|
+
evalCase,
|
|
21283
|
+
target.name,
|
|
21284
|
+
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
21285
|
+
outcome.reason,
|
|
21286
|
+
promptInputs,
|
|
21287
|
+
primaryProvider,
|
|
21288
|
+
"agent",
|
|
21289
|
+
"provider_error",
|
|
21290
|
+
verbose
|
|
21291
|
+
);
|
|
21292
|
+
completedResults.set(evalCase.id, errorResult);
|
|
21293
|
+
results.push(errorResult);
|
|
20932
21294
|
if (onResult) {
|
|
20933
|
-
await onResult(
|
|
21295
|
+
await onResult(errorResult);
|
|
20934
21296
|
}
|
|
20935
|
-
return result;
|
|
20936
|
-
} catch (error) {
|
|
20937
|
-
if (onProgress) {
|
|
20938
|
-
await onProgress({
|
|
20939
|
-
workerId,
|
|
20940
|
-
testId: evalCase.id,
|
|
20941
|
-
status: "failed",
|
|
20942
|
-
completedAt: Date.now(),
|
|
20943
|
-
error: error instanceof Error ? error.message : String(error)
|
|
20944
|
-
});
|
|
20945
|
-
}
|
|
20946
|
-
throw error;
|
|
20947
|
-
} finally {
|
|
20948
|
-
if (testPoolSlot) {
|
|
20949
|
-
availablePoolSlots.push(testPoolSlot);
|
|
20950
|
-
}
|
|
20951
|
-
}
|
|
20952
|
-
})
|
|
20953
|
-
);
|
|
20954
|
-
const settled = await Promise.allSettled(promises);
|
|
20955
|
-
const results = [];
|
|
20956
|
-
for (let i = 0; i < settled.length; i++) {
|
|
20957
|
-
const outcome = settled[i];
|
|
20958
|
-
if (outcome.status === "fulfilled") {
|
|
20959
|
-
results.push(outcome.value);
|
|
20960
|
-
} else {
|
|
20961
|
-
const evalCase = filteredEvalCases[i];
|
|
20962
|
-
const formattingMode = usesFileReferencePrompt(primaryProvider) ? "agent" : "lm";
|
|
20963
|
-
const promptInputs = await buildPromptInputs(evalCase, formattingMode);
|
|
20964
|
-
const errorResult = buildErrorResult(
|
|
20965
|
-
evalCase,
|
|
20966
|
-
target.name,
|
|
20967
|
-
(now ?? (() => /* @__PURE__ */ new Date()))(),
|
|
20968
|
-
outcome.reason,
|
|
20969
|
-
promptInputs,
|
|
20970
|
-
primaryProvider,
|
|
20971
|
-
"agent",
|
|
20972
|
-
"provider_error",
|
|
20973
|
-
verbose
|
|
20974
|
-
);
|
|
20975
|
-
results.push(errorResult);
|
|
20976
|
-
if (onResult) {
|
|
20977
|
-
await onResult(errorResult);
|
|
20978
21297
|
}
|
|
20979
21298
|
}
|
|
20980
21299
|
}
|
|
@@ -21219,7 +21538,8 @@ async function runEvalCase(options) {
|
|
|
21219
21538
|
repoManager,
|
|
21220
21539
|
evalDir,
|
|
21221
21540
|
verbose,
|
|
21222
|
-
threshold: caseThreshold
|
|
21541
|
+
threshold: caseThreshold,
|
|
21542
|
+
dependencyResults
|
|
21223
21543
|
} = options;
|
|
21224
21544
|
const setupDebug = process.env.AGENTV_SETUP_DEBUG === "1";
|
|
21225
21545
|
const formattingMode = usesFileReferencePrompt(provider) ? "agent" : "lm";
|
|
@@ -21263,7 +21583,7 @@ async function runEvalCase(options) {
|
|
|
21263
21583
|
if (caseWorkspaceFile && workspacePath) {
|
|
21264
21584
|
const copiedFile = import_node_path49.default.join(workspacePath, import_node_path49.default.basename(caseWorkspaceFile));
|
|
21265
21585
|
try {
|
|
21266
|
-
await (0,
|
|
21586
|
+
await (0, import_promises36.stat)(copiedFile);
|
|
21267
21587
|
caseWorkspaceFile = copiedFile;
|
|
21268
21588
|
} catch {
|
|
21269
21589
|
}
|
|
@@ -21271,7 +21591,7 @@ async function runEvalCase(options) {
|
|
|
21271
21591
|
}
|
|
21272
21592
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
21273
21593
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
21274
|
-
await (0,
|
|
21594
|
+
await (0, import_promises36.mkdir)(workspacePath, { recursive: true });
|
|
21275
21595
|
}
|
|
21276
21596
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
21277
21597
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -21326,8 +21646,8 @@ async function runEvalCase(options) {
|
|
|
21326
21646
|
const srcPath = import_node_path49.default.resolve(baseDir, relPath);
|
|
21327
21647
|
const destPath = import_node_path49.default.resolve(workspacePath, relPath);
|
|
21328
21648
|
try {
|
|
21329
|
-
await (0,
|
|
21330
|
-
await (0,
|
|
21649
|
+
await (0, import_promises36.mkdir)(import_node_path49.default.dirname(destPath), { recursive: true });
|
|
21650
|
+
await (0, import_promises36.copyFile)(srcPath, destPath);
|
|
21331
21651
|
} catch (error) {
|
|
21332
21652
|
const message = error instanceof Error ? error.message : String(error);
|
|
21333
21653
|
return buildErrorResult(
|
|
@@ -21427,7 +21747,11 @@ async function runEvalCase(options) {
|
|
|
21427
21747
|
if (!baselineCommit && workspacePath) {
|
|
21428
21748
|
try {
|
|
21429
21749
|
baselineCommit = await initializeBaseline(workspacePath);
|
|
21430
|
-
} catch {
|
|
21750
|
+
} catch (error) {
|
|
21751
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
21752
|
+
if (verbose) {
|
|
21753
|
+
console.warn(`[setup] test=${evalCase.id} baseline initialization failed: ${message}`);
|
|
21754
|
+
}
|
|
21431
21755
|
}
|
|
21432
21756
|
}
|
|
21433
21757
|
const caseStartMs = Date.now();
|
|
@@ -21538,6 +21862,11 @@ async function runEvalCase(options) {
|
|
|
21538
21862
|
} catch {
|
|
21539
21863
|
}
|
|
21540
21864
|
}
|
|
21865
|
+
const providerFileChanges = providerResponse?.fileChanges;
|
|
21866
|
+
if (providerFileChanges) {
|
|
21867
|
+
fileChanges = fileChanges ? `${fileChanges}
|
|
21868
|
+
${providerFileChanges}` : providerFileChanges;
|
|
21869
|
+
}
|
|
21541
21870
|
const providerError = extractProviderError(providerResponse);
|
|
21542
21871
|
if (caseHooksEnabled && repoManager && workspacePath && evalCase.workspace?.hooks?.after_each?.reset && evalCase.workspace.hooks.after_each.reset !== "none" && evalCase.workspace.repos) {
|
|
21543
21872
|
try {
|
|
@@ -21595,7 +21924,8 @@ async function runEvalCase(options) {
|
|
|
21595
21924
|
workspacePath,
|
|
21596
21925
|
dockerConfig: evalCase.workspace?.docker,
|
|
21597
21926
|
verbose,
|
|
21598
|
-
threshold: evalCase.threshold ?? caseThreshold
|
|
21927
|
+
threshold: evalCase.threshold ?? caseThreshold,
|
|
21928
|
+
dependencyResults
|
|
21599
21929
|
});
|
|
21600
21930
|
const effectiveThreshold = evalCase.threshold ?? caseThreshold;
|
|
21601
21931
|
const totalDurationMs = Date.now() - caseStartMs;
|
|
@@ -21788,7 +22118,8 @@ async function evaluateCandidate(options) {
|
|
|
21788
22118
|
fileChanges,
|
|
21789
22119
|
workspacePath,
|
|
21790
22120
|
dockerConfig,
|
|
21791
|
-
threshold: evalThreshold
|
|
22121
|
+
threshold: evalThreshold,
|
|
22122
|
+
dependencyResults
|
|
21792
22123
|
} = options;
|
|
21793
22124
|
const gradeTimestamp = nowFn();
|
|
21794
22125
|
const { score, scores } = await runEvaluatorsForCase({
|
|
@@ -21815,7 +22146,8 @@ async function evaluateCandidate(options) {
|
|
|
21815
22146
|
fileChanges,
|
|
21816
22147
|
workspacePath,
|
|
21817
22148
|
dockerConfig,
|
|
21818
|
-
threshold: evalThreshold
|
|
22149
|
+
threshold: evalThreshold,
|
|
22150
|
+
dependencyResults
|
|
21819
22151
|
});
|
|
21820
22152
|
const completedAt = nowFn();
|
|
21821
22153
|
let agentRequest;
|
|
@@ -21891,7 +22223,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
21891
22223
|
fileChanges,
|
|
21892
22224
|
workspacePath,
|
|
21893
22225
|
dockerConfig,
|
|
21894
|
-
threshold
|
|
22226
|
+
threshold,
|
|
22227
|
+
dependencyResults
|
|
21895
22228
|
} = options;
|
|
21896
22229
|
if (evalCase.assertions && evalCase.assertions.length > 0) {
|
|
21897
22230
|
return runEvaluatorList({
|
|
@@ -21919,7 +22252,8 @@ async function runEvaluatorsForCase(options) {
|
|
|
21919
22252
|
fileChanges,
|
|
21920
22253
|
workspacePath,
|
|
21921
22254
|
dockerConfig,
|
|
21922
|
-
threshold
|
|
22255
|
+
threshold,
|
|
22256
|
+
dependencyResults
|
|
21923
22257
|
});
|
|
21924
22258
|
}
|
|
21925
22259
|
const evaluatorKind = evalCase.evaluator ?? "llm-grader";
|
|
@@ -21949,6 +22283,7 @@ async function runEvaluatorsForCase(options) {
|
|
|
21949
22283
|
fileChanges,
|
|
21950
22284
|
workspacePath,
|
|
21951
22285
|
dockerConfig,
|
|
22286
|
+
dependencyResults,
|
|
21952
22287
|
...implicitEvaluator ? { evaluator: implicitEvaluator } : {}
|
|
21953
22288
|
});
|
|
21954
22289
|
return { score };
|
|
@@ -21988,7 +22323,8 @@ async function runEvaluatorList(options) {
|
|
|
21988
22323
|
availableTargets,
|
|
21989
22324
|
fileChanges,
|
|
21990
22325
|
workspacePath,
|
|
21991
|
-
dockerConfig
|
|
22326
|
+
dockerConfig,
|
|
22327
|
+
dependencyResults
|
|
21992
22328
|
} = options;
|
|
21993
22329
|
const scored = [];
|
|
21994
22330
|
const scores = [];
|
|
@@ -22012,7 +22348,8 @@ async function runEvaluatorList(options) {
|
|
|
22012
22348
|
availableTargets,
|
|
22013
22349
|
fileChanges,
|
|
22014
22350
|
workspacePath,
|
|
22015
|
-
dockerConfig
|
|
22351
|
+
dockerConfig,
|
|
22352
|
+
dependencyResults
|
|
22016
22353
|
};
|
|
22017
22354
|
const evalFileDir = evalCase.file_paths[0] ? import_node_path49.default.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
22018
22355
|
const dispatchContext = {
|
|
@@ -22742,7 +23079,7 @@ init_cjs_shims();
|
|
|
22742
23079
|
|
|
22743
23080
|
// src/evaluation/workspace/deps-scanner.ts
|
|
22744
23081
|
init_cjs_shims();
|
|
22745
|
-
var
|
|
23082
|
+
var import_promises37 = require("fs/promises");
|
|
22746
23083
|
var import_node_path51 = __toESM(require("path"), 1);
|
|
22747
23084
|
var import_yaml9 = require("yaml");
|
|
22748
23085
|
function normalizeGitUrl(url) {
|
|
@@ -22790,7 +23127,7 @@ async function scanRepoDeps(evalFilePaths) {
|
|
|
22790
23127
|
return { repos: [...seen.values()], errors };
|
|
22791
23128
|
}
|
|
22792
23129
|
async function extractReposFromEvalFile(filePath) {
|
|
22793
|
-
const content = await (0,
|
|
23130
|
+
const content = await (0, import_promises37.readFile)(filePath, "utf8");
|
|
22794
23131
|
const parsed = interpolateEnv((0, import_yaml9.parse)(content), process.env);
|
|
22795
23132
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
22796
23133
|
const obj = parsed;
|
|
@@ -22811,7 +23148,7 @@ async function extractReposFromEvalFile(filePath) {
|
|
|
22811
23148
|
async function extractReposFromWorkspaceRaw(raw, evalFileDir) {
|
|
22812
23149
|
if (typeof raw === "string") {
|
|
22813
23150
|
const workspaceFilePath = import_node_path51.default.resolve(evalFileDir, raw);
|
|
22814
|
-
const content = await (0,
|
|
23151
|
+
const content = await (0, import_promises37.readFile)(workspaceFilePath, "utf8");
|
|
22815
23152
|
const parsed = interpolateEnv((0, import_yaml9.parse)(content), process.env);
|
|
22816
23153
|
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return [];
|
|
22817
23154
|
return extractReposFromObject(parsed);
|
|
@@ -22843,7 +23180,7 @@ init_docker_workspace();
|
|
|
22843
23180
|
|
|
22844
23181
|
// src/evaluation/cache/response-cache.ts
|
|
22845
23182
|
init_cjs_shims();
|
|
22846
|
-
var
|
|
23183
|
+
var import_promises38 = require("fs/promises");
|
|
22847
23184
|
var import_node_path52 = __toESM(require("path"), 1);
|
|
22848
23185
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
22849
23186
|
var ResponseCache = class {
|
|
@@ -22854,7 +23191,7 @@ var ResponseCache = class {
|
|
|
22854
23191
|
async get(key) {
|
|
22855
23192
|
const filePath = this.keyToPath(key);
|
|
22856
23193
|
try {
|
|
22857
|
-
const data = await (0,
|
|
23194
|
+
const data = await (0, import_promises38.readFile)(filePath, "utf8");
|
|
22858
23195
|
return JSON.parse(data);
|
|
22859
23196
|
} catch {
|
|
22860
23197
|
return void 0;
|
|
@@ -22863,8 +23200,8 @@ var ResponseCache = class {
|
|
|
22863
23200
|
async set(key, value) {
|
|
22864
23201
|
const filePath = this.keyToPath(key);
|
|
22865
23202
|
const dir = import_node_path52.default.dirname(filePath);
|
|
22866
|
-
await (0,
|
|
22867
|
-
await (0,
|
|
23203
|
+
await (0, import_promises38.mkdir)(dir, { recursive: true });
|
|
23204
|
+
await (0, import_promises38.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
22868
23205
|
}
|
|
22869
23206
|
keyToPath(key) {
|
|
22870
23207
|
const prefix = key.slice(0, 2);
|
|
@@ -22887,8 +23224,8 @@ function shouldSkipCacheForTemperature(targetConfig) {
|
|
|
22887
23224
|
init_cjs_shims();
|
|
22888
23225
|
var import_node_child_process11 = require("child_process");
|
|
22889
23226
|
var import_node_fs18 = require("fs");
|
|
22890
|
-
var
|
|
22891
|
-
var
|
|
23227
|
+
var import_promises39 = require("fs/promises");
|
|
23228
|
+
var import_node_os9 = __toESM(require("os"), 1);
|
|
22892
23229
|
var import_node_path53 = __toESM(require("path"), 1);
|
|
22893
23230
|
var import_node_util7 = require("util");
|
|
22894
23231
|
var execFileAsync3 = (0, import_node_util7.promisify)(import_node_child_process11.execFile);
|
|
@@ -23079,7 +23416,7 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
23079
23416
|
const cloneDir = await ensureResultsRepoClone(normalized);
|
|
23080
23417
|
const baseBranch = await resolveDefaultBranch(cloneDir);
|
|
23081
23418
|
await updateCacheRepo(cloneDir, baseBranch);
|
|
23082
|
-
const worktreeRoot = await (0,
|
|
23419
|
+
const worktreeRoot = await (0, import_promises39.mkdtemp)(import_node_path53.default.join(import_node_os9.default.tmpdir(), "agentv-results-repo-"));
|
|
23083
23420
|
const worktreeDir = import_node_path53.default.join(worktreeRoot, "repo");
|
|
23084
23421
|
await runGit(["worktree", "add", "-B", branchName, worktreeDir, `origin/${baseBranch}`], {
|
|
23085
23422
|
cwd: cloneDir
|
|
@@ -23092,7 +23429,7 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
23092
23429
|
try {
|
|
23093
23430
|
await runGit(["worktree", "remove", "--force", worktreeDir], { cwd: cloneDir });
|
|
23094
23431
|
} finally {
|
|
23095
|
-
await (0,
|
|
23432
|
+
await (0, import_promises39.rm)(worktreeRoot, { recursive: true, force: true }).catch(() => void 0);
|
|
23096
23433
|
}
|
|
23097
23434
|
}
|
|
23098
23435
|
};
|
|
@@ -23100,7 +23437,7 @@ async function prepareResultsRepoBranch(config, branchName) {
|
|
|
23100
23437
|
async function stageResultsArtifacts(params) {
|
|
23101
23438
|
(0, import_node_fs18.rmSync)(params.destinationDir, { recursive: true, force: true });
|
|
23102
23439
|
(0, import_node_fs18.mkdirSync)(import_node_path53.default.dirname(params.destinationDir), { recursive: true });
|
|
23103
|
-
await (0,
|
|
23440
|
+
await (0, import_promises39.cp)(params.sourceDir, params.destinationDir, { recursive: true });
|
|
23104
23441
|
}
|
|
23105
23442
|
function resolveResultsRepoRunsDir(config) {
|
|
23106
23443
|
const normalized = normalizeResultsExportConfig(config);
|
|
@@ -23110,12 +23447,12 @@ function resolveResultsRepoRunsDir(config) {
|
|
|
23110
23447
|
);
|
|
23111
23448
|
}
|
|
23112
23449
|
async function directorySizeBytes(targetPath) {
|
|
23113
|
-
const entry = await (0,
|
|
23450
|
+
const entry = await (0, import_promises39.stat)(targetPath);
|
|
23114
23451
|
if (entry.isFile()) {
|
|
23115
23452
|
return entry.size;
|
|
23116
23453
|
}
|
|
23117
23454
|
let total = 0;
|
|
23118
|
-
for (const child of await (0,
|
|
23455
|
+
for (const child of await (0, import_promises39.readdir)(targetPath, { withFileTypes: true })) {
|
|
23119
23456
|
total += await directorySizeBytes(import_node_path53.default.join(targetPath, child.name));
|
|
23120
23457
|
}
|
|
23121
23458
|
return total;
|
|
@@ -24198,17 +24535,17 @@ function extractResponseItemContent(content) {
|
|
|
24198
24535
|
|
|
24199
24536
|
// src/import/codex-session-discovery.ts
|
|
24200
24537
|
init_cjs_shims();
|
|
24201
|
-
var
|
|
24202
|
-
var
|
|
24538
|
+
var import_promises41 = require("fs/promises");
|
|
24539
|
+
var import_node_os10 = require("os");
|
|
24203
24540
|
var import_node_path56 = __toESM(require("path"), 1);
|
|
24204
|
-
var DEFAULT_SESSIONS_DIR = () => import_node_path56.default.join((0,
|
|
24541
|
+
var DEFAULT_SESSIONS_DIR = () => import_node_path56.default.join((0, import_node_os10.homedir)(), ".codex", "sessions");
|
|
24205
24542
|
async function discoverCodexSessions(opts) {
|
|
24206
24543
|
const sessionsDir = opts?.sessionsDir ?? DEFAULT_SESSIONS_DIR();
|
|
24207
24544
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
24208
24545
|
const sessions = [];
|
|
24209
24546
|
let yearDirs;
|
|
24210
24547
|
try {
|
|
24211
|
-
yearDirs = await (0,
|
|
24548
|
+
yearDirs = await (0, import_promises41.readdir)(sessionsDir);
|
|
24212
24549
|
} catch {
|
|
24213
24550
|
return [];
|
|
24214
24551
|
}
|
|
@@ -24216,7 +24553,7 @@ async function discoverCodexSessions(opts) {
|
|
|
24216
24553
|
const yearPath = import_node_path56.default.join(sessionsDir, year);
|
|
24217
24554
|
let monthDirs;
|
|
24218
24555
|
try {
|
|
24219
|
-
monthDirs = await (0,
|
|
24556
|
+
monthDirs = await (0, import_promises41.readdir)(yearPath);
|
|
24220
24557
|
} catch {
|
|
24221
24558
|
continue;
|
|
24222
24559
|
}
|
|
@@ -24224,7 +24561,7 @@ async function discoverCodexSessions(opts) {
|
|
|
24224
24561
|
const monthPath = import_node_path56.default.join(yearPath, month);
|
|
24225
24562
|
let dayDirs;
|
|
24226
24563
|
try {
|
|
24227
|
-
dayDirs = await (0,
|
|
24564
|
+
dayDirs = await (0, import_promises41.readdir)(monthPath);
|
|
24228
24565
|
} catch {
|
|
24229
24566
|
continue;
|
|
24230
24567
|
}
|
|
@@ -24236,7 +24573,7 @@ async function discoverCodexSessions(opts) {
|
|
|
24236
24573
|
const dayPath = import_node_path56.default.join(monthPath, day);
|
|
24237
24574
|
let files;
|
|
24238
24575
|
try {
|
|
24239
|
-
files = await (0,
|
|
24576
|
+
files = await (0, import_promises41.readdir)(dayPath);
|
|
24240
24577
|
} catch {
|
|
24241
24578
|
continue;
|
|
24242
24579
|
}
|
|
@@ -24248,7 +24585,7 @@ async function discoverCodexSessions(opts) {
|
|
|
24248
24585
|
const sessionId = parts.length >= 6 ? parts.slice(-5).join("-") : nameWithoutExt;
|
|
24249
24586
|
let updatedAt;
|
|
24250
24587
|
try {
|
|
24251
|
-
const fileStat = await (0,
|
|
24588
|
+
const fileStat = await (0, import_promises41.stat)(filePath);
|
|
24252
24589
|
updatedAt = fileStat.mtime;
|
|
24253
24590
|
} catch {
|
|
24254
24591
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -24264,10 +24601,10 @@ async function discoverCodexSessions(opts) {
|
|
|
24264
24601
|
|
|
24265
24602
|
// src/import/session-discovery.ts
|
|
24266
24603
|
init_cjs_shims();
|
|
24267
|
-
var
|
|
24268
|
-
var
|
|
24604
|
+
var import_promises42 = require("fs/promises");
|
|
24605
|
+
var import_node_os11 = require("os");
|
|
24269
24606
|
var import_node_path57 = __toESM(require("path"), 1);
|
|
24270
|
-
var DEFAULT_PROJECTS_DIR = () => import_node_path57.default.join((0,
|
|
24607
|
+
var DEFAULT_PROJECTS_DIR = () => import_node_path57.default.join((0, import_node_os11.homedir)(), ".claude", "projects");
|
|
24271
24608
|
function encodeProjectPath(projectPath) {
|
|
24272
24609
|
return projectPath.replace(/\//g, "-");
|
|
24273
24610
|
}
|
|
@@ -24276,7 +24613,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
24276
24613
|
const limit = opts?.latest ? 1 : opts?.limit ?? 10;
|
|
24277
24614
|
let projectDirs;
|
|
24278
24615
|
try {
|
|
24279
|
-
projectDirs = await (0,
|
|
24616
|
+
projectDirs = await (0, import_promises42.readdir)(projectsDir);
|
|
24280
24617
|
} catch {
|
|
24281
24618
|
return [];
|
|
24282
24619
|
}
|
|
@@ -24289,7 +24626,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
24289
24626
|
const dirPath = import_node_path57.default.join(projectsDir, projectDir);
|
|
24290
24627
|
let entries;
|
|
24291
24628
|
try {
|
|
24292
|
-
entries = await (0,
|
|
24629
|
+
entries = await (0, import_promises42.readdir)(dirPath);
|
|
24293
24630
|
} catch {
|
|
24294
24631
|
continue;
|
|
24295
24632
|
}
|
|
@@ -24300,7 +24637,7 @@ async function discoverClaudeSessions(opts) {
|
|
|
24300
24637
|
const filePath = import_node_path57.default.join(dirPath, entry);
|
|
24301
24638
|
let updatedAt;
|
|
24302
24639
|
try {
|
|
24303
|
-
const fileStat = await (0,
|
|
24640
|
+
const fileStat = await (0, import_promises42.stat)(filePath);
|
|
24304
24641
|
updatedAt = fileStat.mtime;
|
|
24305
24642
|
} catch {
|
|
24306
24643
|
updatedAt = /* @__PURE__ */ new Date(0);
|
|
@@ -24322,7 +24659,7 @@ init_cjs_shims();
|
|
|
24322
24659
|
|
|
24323
24660
|
// src/import/types.ts
|
|
24324
24661
|
init_cjs_shims();
|
|
24325
|
-
var
|
|
24662
|
+
var import_promises43 = require("fs/promises");
|
|
24326
24663
|
function toTranscriptJsonLine(entry) {
|
|
24327
24664
|
const firstUserMessage = entry.messages.find((m) => m.role === "user");
|
|
24328
24665
|
const input = typeof firstUserMessage?.content === "string" ? firstUserMessage.content : "";
|
|
@@ -24348,11 +24685,11 @@ function toTranscriptJsonLine(entry) {
|
|
|
24348
24685
|
};
|
|
24349
24686
|
}
|
|
24350
24687
|
async function readTranscriptJsonl(filePath) {
|
|
24351
|
-
const text = await (0,
|
|
24688
|
+
const text = await (0, import_promises43.readFile)(filePath, "utf8");
|
|
24352
24689
|
return text.split("\n").filter((line) => line.trim().length > 0).map((line) => JSON.parse(line));
|
|
24353
24690
|
}
|
|
24354
24691
|
async function readTranscriptFile(filePath) {
|
|
24355
|
-
return (0,
|
|
24692
|
+
return (0, import_promises43.readFile)(filePath, "utf8");
|
|
24356
24693
|
}
|
|
24357
24694
|
|
|
24358
24695
|
// src/import/transcript-provider.ts
|