@agentv/core 3.13.3 → 3.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-ZB3AUPES.js → chunk-HP5PFOVK.js} +48 -1
- package/dist/{chunk-ZB3AUPES.js.map → chunk-HP5PFOVK.js.map} +1 -1
- package/dist/evaluation/validation/index.cjs +1 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +877 -434
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +62 -2
- package/dist/index.d.ts +62 -2
- package/dist/index.js +728 -333
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
readTextFile,
|
|
20
20
|
resolveFileReference,
|
|
21
21
|
resolveTargetDefinition
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-HP5PFOVK.js";
|
|
23
23
|
import {
|
|
24
24
|
AgentvProvider
|
|
25
25
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -2955,22 +2955,31 @@ async function loadTestById(evalFilePath, repoRoot, evalId) {
|
|
|
2955
2955
|
return match;
|
|
2956
2956
|
}
|
|
2957
2957
|
var loadEvalCaseById = loadTestById;
|
|
2958
|
+
function parseCommandArray(source) {
|
|
2959
|
+
if (typeof source === "string") {
|
|
2960
|
+
const parts = source.trim().split(/\s+/);
|
|
2961
|
+
return parts.length > 0 && parts[0] !== "" ? parts : void 0;
|
|
2962
|
+
}
|
|
2963
|
+
if (Array.isArray(source)) {
|
|
2964
|
+
const arr = source.filter((s) => typeof s === "string");
|
|
2965
|
+
return arr.length > 0 ? arr : void 0;
|
|
2966
|
+
}
|
|
2967
|
+
return void 0;
|
|
2968
|
+
}
|
|
2958
2969
|
function parseWorkspaceScriptConfig(raw, evalFileDir) {
|
|
2959
2970
|
if (!isJsonObject(raw)) return void 0;
|
|
2960
2971
|
const obj = raw;
|
|
2961
2972
|
if (obj.script !== void 0 && obj.command === void 0) {
|
|
2962
2973
|
logWarning5("'script' is deprecated. Use 'command' instead.");
|
|
2963
2974
|
}
|
|
2964
|
-
const
|
|
2965
|
-
if (!
|
|
2966
|
-
const commandArr = commandSource.filter((s) => typeof s === "string");
|
|
2967
|
-
if (commandArr.length === 0) return void 0;
|
|
2975
|
+
const command = parseCommandArray(obj.command ?? obj.script);
|
|
2976
|
+
if (!command) return void 0;
|
|
2968
2977
|
const timeoutMs = typeof obj.timeout_ms === "number" ? obj.timeout_ms : void 0;
|
|
2969
2978
|
let cwd = typeof obj.cwd === "string" ? obj.cwd : void 0;
|
|
2970
2979
|
if (cwd && !path7.isAbsolute(cwd)) {
|
|
2971
2980
|
cwd = path7.resolve(evalFileDir, cwd);
|
|
2972
2981
|
}
|
|
2973
|
-
const config = { command
|
|
2982
|
+
const config = { command };
|
|
2974
2983
|
if (timeoutMs !== void 0) {
|
|
2975
2984
|
return { ...config, timeout_ms: timeoutMs, ...cwd !== void 0 && { cwd } };
|
|
2976
2985
|
}
|
|
@@ -6407,10 +6416,268 @@ function summarizeAcpEvent(eventType, data) {
|
|
|
6407
6416
|
}
|
|
6408
6417
|
}
|
|
6409
6418
|
|
|
6419
|
+
// src/evaluation/providers/copilot-log.ts
|
|
6420
|
+
import { readFile as readFile8 } from "node:fs/promises";
|
|
6421
|
+
import { homedir as homedir2 } from "node:os";
|
|
6422
|
+
import path17 from "node:path";
|
|
6423
|
+
|
|
6424
|
+
// src/evaluation/providers/copilot-log-parser.ts
|
|
6425
|
+
function parseCopilotEvents(eventsJsonl) {
|
|
6426
|
+
const messages = [];
|
|
6427
|
+
const meta = { sessionId: "", model: "", cwd: "" };
|
|
6428
|
+
let totalInputTokens = 0;
|
|
6429
|
+
let totalOutputTokens = 0;
|
|
6430
|
+
let hasUsage = false;
|
|
6431
|
+
let startTimestamp;
|
|
6432
|
+
let endTimestamp;
|
|
6433
|
+
const toolCallsInProgress = /* @__PURE__ */ new Map();
|
|
6434
|
+
const lines = eventsJsonl.split("\n").filter((l) => l.trim().length > 0);
|
|
6435
|
+
for (const line of lines) {
|
|
6436
|
+
let event;
|
|
6437
|
+
try {
|
|
6438
|
+
event = JSON.parse(line);
|
|
6439
|
+
} catch {
|
|
6440
|
+
continue;
|
|
6441
|
+
}
|
|
6442
|
+
const eventType = event.type;
|
|
6443
|
+
if (!eventType) continue;
|
|
6444
|
+
const data = event.data ?? {};
|
|
6445
|
+
switch (eventType) {
|
|
6446
|
+
case "session.start": {
|
|
6447
|
+
meta.sessionId = String(data.sessionId ?? "");
|
|
6448
|
+
const ctx = data.context;
|
|
6449
|
+
meta.cwd = String(ctx?.cwd ?? "");
|
|
6450
|
+
meta.repository = ctx?.repository ? String(ctx.repository) : void 0;
|
|
6451
|
+
meta.branch = ctx?.branch ? String(ctx.branch) : void 0;
|
|
6452
|
+
const ts = event.timestamp ?? data.startTime;
|
|
6453
|
+
meta.startedAt = ts ? String(ts) : void 0;
|
|
6454
|
+
startTimestamp = ts ? String(ts) : void 0;
|
|
6455
|
+
break;
|
|
6456
|
+
}
|
|
6457
|
+
case "user.message": {
|
|
6458
|
+
messages.push({
|
|
6459
|
+
role: "user",
|
|
6460
|
+
content: data.content != null ? String(data.content) : ""
|
|
6461
|
+
});
|
|
6462
|
+
break;
|
|
6463
|
+
}
|
|
6464
|
+
case "assistant.message": {
|
|
6465
|
+
const toolRequests = data.toolRequests;
|
|
6466
|
+
const toolCalls = (toolRequests ?? []).map((req) => ({
|
|
6467
|
+
tool: String(req.name ?? req.toolName ?? ""),
|
|
6468
|
+
input: req.arguments,
|
|
6469
|
+
id: req.toolCallId ? String(req.toolCallId) : void 0
|
|
6470
|
+
}));
|
|
6471
|
+
messages.push({
|
|
6472
|
+
role: "assistant",
|
|
6473
|
+
content: data.content != null ? String(data.content) : void 0,
|
|
6474
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0
|
|
6475
|
+
});
|
|
6476
|
+
break;
|
|
6477
|
+
}
|
|
6478
|
+
case "skill.invoked": {
|
|
6479
|
+
const skillName = String(data.name ?? "");
|
|
6480
|
+
messages.push({
|
|
6481
|
+
role: "assistant",
|
|
6482
|
+
toolCalls: [
|
|
6483
|
+
{
|
|
6484
|
+
tool: "Skill",
|
|
6485
|
+
input: { skill: skillName }
|
|
6486
|
+
}
|
|
6487
|
+
]
|
|
6488
|
+
});
|
|
6489
|
+
break;
|
|
6490
|
+
}
|
|
6491
|
+
case "tool.execution_start": {
|
|
6492
|
+
const toolCallId = String(data.toolCallId ?? "");
|
|
6493
|
+
if (toolCallId) {
|
|
6494
|
+
toolCallsInProgress.set(toolCallId, {
|
|
6495
|
+
toolName: String(data.toolName ?? ""),
|
|
6496
|
+
input: data.arguments,
|
|
6497
|
+
toolCallId
|
|
6498
|
+
});
|
|
6499
|
+
}
|
|
6500
|
+
break;
|
|
6501
|
+
}
|
|
6502
|
+
case "tool.execution_complete": {
|
|
6503
|
+
const toolCallId = String(data.toolCallId ?? "");
|
|
6504
|
+
const started = toolCallsInProgress.get(toolCallId);
|
|
6505
|
+
if (started) {
|
|
6506
|
+
toolCallsInProgress.delete(toolCallId);
|
|
6507
|
+
messages.push({
|
|
6508
|
+
role: "assistant",
|
|
6509
|
+
toolCalls: [
|
|
6510
|
+
{
|
|
6511
|
+
tool: started.toolName,
|
|
6512
|
+
input: started.input,
|
|
6513
|
+
output: data.result,
|
|
6514
|
+
id: toolCallId
|
|
6515
|
+
}
|
|
6516
|
+
]
|
|
6517
|
+
});
|
|
6518
|
+
}
|
|
6519
|
+
break;
|
|
6520
|
+
}
|
|
6521
|
+
case "session.shutdown": {
|
|
6522
|
+
endTimestamp = event.timestamp ? String(event.timestamp) : void 0;
|
|
6523
|
+
const modelMetrics = data.modelMetrics;
|
|
6524
|
+
if (modelMetrics) {
|
|
6525
|
+
for (const metrics of Object.values(modelMetrics)) {
|
|
6526
|
+
if (metrics.usage) {
|
|
6527
|
+
hasUsage = true;
|
|
6528
|
+
totalInputTokens += Number(metrics.usage.inputTokens ?? 0);
|
|
6529
|
+
totalOutputTokens += Number(metrics.usage.outputTokens ?? 0);
|
|
6530
|
+
}
|
|
6531
|
+
}
|
|
6532
|
+
}
|
|
6533
|
+
const currentModel = data.currentModel;
|
|
6534
|
+
if (currentModel && !meta.model) {
|
|
6535
|
+
meta.model = String(currentModel);
|
|
6536
|
+
}
|
|
6537
|
+
break;
|
|
6538
|
+
}
|
|
6539
|
+
}
|
|
6540
|
+
}
|
|
6541
|
+
let durationMs;
|
|
6542
|
+
if (startTimestamp && endTimestamp) {
|
|
6543
|
+
durationMs = new Date(endTimestamp).getTime() - new Date(startTimestamp).getTime();
|
|
6544
|
+
}
|
|
6545
|
+
return {
|
|
6546
|
+
messages,
|
|
6547
|
+
meta,
|
|
6548
|
+
tokenUsage: hasUsage ? { input: totalInputTokens, output: totalOutputTokens } : void 0,
|
|
6549
|
+
durationMs
|
|
6550
|
+
};
|
|
6551
|
+
}
|
|
6552
|
+
|
|
6553
|
+
// src/evaluation/providers/copilot-session-discovery.ts
|
|
6554
|
+
import { readFile as readFile7, readdir, stat } from "node:fs/promises";
|
|
6555
|
+
import { homedir } from "node:os";
|
|
6556
|
+
import path16 from "node:path";
|
|
6557
|
+
import { parse as parseYaml2 } from "yaml";
|
|
6558
|
+
var DEFAULT_SESSION_STATE_DIR = () => path16.join(homedir(), ".copilot", "session-state");
|
|
6559
|
+
async function discoverCopilotSessions(opts) {
|
|
6560
|
+
const sessionStateDir = opts?.sessionStateDir ?? DEFAULT_SESSION_STATE_DIR();
|
|
6561
|
+
const limit = opts?.limit ?? 10;
|
|
6562
|
+
let entries;
|
|
6563
|
+
try {
|
|
6564
|
+
entries = await readdir(sessionStateDir);
|
|
6565
|
+
} catch {
|
|
6566
|
+
return [];
|
|
6567
|
+
}
|
|
6568
|
+
const sessions = [];
|
|
6569
|
+
for (const entry of entries) {
|
|
6570
|
+
const sessionDir = path16.join(sessionStateDir, entry);
|
|
6571
|
+
const workspacePath = path16.join(sessionDir, "workspace.yaml");
|
|
6572
|
+
const eventsPath = path16.join(sessionDir, "events.jsonl");
|
|
6573
|
+
try {
|
|
6574
|
+
const workspaceContent = await readFile7(workspacePath, "utf8");
|
|
6575
|
+
const workspace = parseYaml2(workspaceContent) ?? {};
|
|
6576
|
+
const cwd = String(workspace.cwd ?? "");
|
|
6577
|
+
let updatedAt;
|
|
6578
|
+
try {
|
|
6579
|
+
const eventsStat = await stat(eventsPath);
|
|
6580
|
+
updatedAt = eventsStat.mtime;
|
|
6581
|
+
} catch {
|
|
6582
|
+
updatedAt = /* @__PURE__ */ new Date(0);
|
|
6583
|
+
}
|
|
6584
|
+
let isActive = true;
|
|
6585
|
+
try {
|
|
6586
|
+
const fd = await import("node:fs/promises").then((fs3) => fs3.open(eventsPath, "r"));
|
|
6587
|
+
try {
|
|
6588
|
+
const fstat = await fd.stat();
|
|
6589
|
+
const tailSize = Math.min(fstat.size, 4096);
|
|
6590
|
+
const buf = Buffer.alloc(tailSize);
|
|
6591
|
+
await fd.read(buf, 0, tailSize, Math.max(0, fstat.size - tailSize));
|
|
6592
|
+
isActive = !buf.toString("utf8").includes('"session.shutdown"');
|
|
6593
|
+
} finally {
|
|
6594
|
+
await fd.close();
|
|
6595
|
+
}
|
|
6596
|
+
} catch {
|
|
6597
|
+
}
|
|
6598
|
+
sessions.push({
|
|
6599
|
+
sessionId: entry,
|
|
6600
|
+
sessionDir,
|
|
6601
|
+
cwd,
|
|
6602
|
+
repository: workspace.repository ? String(workspace.repository) : void 0,
|
|
6603
|
+
updatedAt,
|
|
6604
|
+
isActive
|
|
6605
|
+
});
|
|
6606
|
+
} catch {
|
|
6607
|
+
}
|
|
6608
|
+
}
|
|
6609
|
+
let filtered = sessions;
|
|
6610
|
+
if (opts?.cwd) {
|
|
6611
|
+
filtered = filtered.filter((s) => s.cwd === opts.cwd);
|
|
6612
|
+
}
|
|
6613
|
+
if (opts?.repository) {
|
|
6614
|
+
filtered = filtered.filter((s) => s.repository === opts.repository);
|
|
6615
|
+
}
|
|
6616
|
+
filtered.sort((a, b) => b.updatedAt.getTime() - a.updatedAt.getTime());
|
|
6617
|
+
return filtered.slice(0, limit);
|
|
6618
|
+
}
|
|
6619
|
+
|
|
6620
|
+
// src/evaluation/providers/copilot-log.ts
|
|
6621
|
+
var CopilotLogProvider = class {
|
|
6622
|
+
id;
|
|
6623
|
+
kind = "copilot-log";
|
|
6624
|
+
targetName;
|
|
6625
|
+
config;
|
|
6626
|
+
constructor(targetName, config) {
|
|
6627
|
+
this.targetName = targetName;
|
|
6628
|
+
this.id = `copilot-log:${targetName}`;
|
|
6629
|
+
this.config = config;
|
|
6630
|
+
}
|
|
6631
|
+
async invoke(_request) {
|
|
6632
|
+
const sessionDir = await this.resolveSessionDir();
|
|
6633
|
+
const eventsPath = path17.join(sessionDir, "events.jsonl");
|
|
6634
|
+
let eventsContent;
|
|
6635
|
+
try {
|
|
6636
|
+
eventsContent = await readFile8(eventsPath, "utf8");
|
|
6637
|
+
} catch (err) {
|
|
6638
|
+
throw new Error(
|
|
6639
|
+
`Failed to read Copilot session transcript at ${eventsPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
6640
|
+
);
|
|
6641
|
+
}
|
|
6642
|
+
const parsed = parseCopilotEvents(eventsContent);
|
|
6643
|
+
return {
|
|
6644
|
+
output: parsed.messages,
|
|
6645
|
+
tokenUsage: parsed.tokenUsage,
|
|
6646
|
+
durationMs: parsed.durationMs,
|
|
6647
|
+
startTime: parsed.meta.startedAt
|
|
6648
|
+
};
|
|
6649
|
+
}
|
|
6650
|
+
async resolveSessionDir() {
|
|
6651
|
+
if (this.config.sessionDir) {
|
|
6652
|
+
return this.config.sessionDir;
|
|
6653
|
+
}
|
|
6654
|
+
if (this.config.sessionId) {
|
|
6655
|
+
const stateDir = this.config.sessionStateDir ?? path17.join(homedir2(), ".copilot", "session-state");
|
|
6656
|
+
return path17.join(stateDir, this.config.sessionId);
|
|
6657
|
+
}
|
|
6658
|
+
if (this.config.discover === "latest") {
|
|
6659
|
+
const sessions = await discoverCopilotSessions({
|
|
6660
|
+
sessionStateDir: this.config.sessionStateDir,
|
|
6661
|
+
cwd: this.config.cwd,
|
|
6662
|
+
limit: 1
|
|
6663
|
+
});
|
|
6664
|
+
if (sessions.length === 0) {
|
|
6665
|
+
throw new Error(
|
|
6666
|
+
`No Copilot CLI sessions found${this.config.cwd ? ` for cwd=${this.config.cwd}` : ""}. Check that sessions exist in ${this.config.sessionStateDir ?? "~/.copilot/session-state/"}`
|
|
6667
|
+
);
|
|
6668
|
+
}
|
|
6669
|
+
return sessions[0].sessionDir;
|
|
6670
|
+
}
|
|
6671
|
+
throw new Error(
|
|
6672
|
+
'CopilotLogProvider requires one of: sessionDir, sessionId, or discover="latest"'
|
|
6673
|
+
);
|
|
6674
|
+
}
|
|
6675
|
+
};
|
|
6676
|
+
|
|
6410
6677
|
// src/evaluation/providers/copilot-sdk.ts
|
|
6411
6678
|
import { randomUUID as randomUUID6 } from "node:crypto";
|
|
6412
6679
|
import { mkdir as mkdir5 } from "node:fs/promises";
|
|
6413
|
-
import
|
|
6680
|
+
import path18 from "node:path";
|
|
6414
6681
|
|
|
6415
6682
|
// src/evaluation/providers/copilot-sdk-log-tracker.ts
|
|
6416
6683
|
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.copilotSdkLogs");
|
|
@@ -6700,10 +6967,10 @@ var CopilotSdkProvider = class {
|
|
|
6700
6967
|
}
|
|
6701
6968
|
resolveCwd(cwdOverride) {
|
|
6702
6969
|
if (cwdOverride) {
|
|
6703
|
-
return
|
|
6970
|
+
return path18.resolve(cwdOverride);
|
|
6704
6971
|
}
|
|
6705
6972
|
if (this.config.cwd) {
|
|
6706
|
-
return
|
|
6973
|
+
return path18.resolve(this.config.cwd);
|
|
6707
6974
|
}
|
|
6708
6975
|
return void 0;
|
|
6709
6976
|
}
|
|
@@ -6712,9 +6979,9 @@ var CopilotSdkProvider = class {
|
|
|
6712
6979
|
return void 0;
|
|
6713
6980
|
}
|
|
6714
6981
|
if (this.config.logDir) {
|
|
6715
|
-
return
|
|
6982
|
+
return path18.resolve(this.config.logDir);
|
|
6716
6983
|
}
|
|
6717
|
-
return
|
|
6984
|
+
return path18.join(process.cwd(), ".agentv", "logs", "copilot-sdk");
|
|
6718
6985
|
}
|
|
6719
6986
|
async createStreamLogger(request) {
|
|
6720
6987
|
const logDir = this.resolveLogDirectory();
|
|
@@ -6728,7 +6995,7 @@ var CopilotSdkProvider = class {
|
|
|
6728
6995
|
console.warn(`Skipping Copilot SDK stream logging (could not create ${logDir}): ${message}`);
|
|
6729
6996
|
return void 0;
|
|
6730
6997
|
}
|
|
6731
|
-
const filePath =
|
|
6998
|
+
const filePath = path18.join(logDir, buildLogFilename4(request, this.targetName, "copilot-sdk"));
|
|
6732
6999
|
try {
|
|
6733
7000
|
const logger = await CopilotStreamLogger.create(
|
|
6734
7001
|
{
|
|
@@ -6825,7 +7092,7 @@ import { randomUUID as randomUUID7 } from "node:crypto";
|
|
|
6825
7092
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
6826
7093
|
import { mkdir as mkdir6, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
6827
7094
|
import { tmpdir } from "node:os";
|
|
6828
|
-
import
|
|
7095
|
+
import path19 from "node:path";
|
|
6829
7096
|
|
|
6830
7097
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
6831
7098
|
var GLOBAL_LOGS_KEY5 = Symbol.for("agentv.piLogs");
|
|
@@ -6928,13 +7195,14 @@ var PiCliProvider = class {
|
|
|
6928
7195
|
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
6929
7196
|
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
6930
7197
|
const startMs = Date.now();
|
|
6931
|
-
const
|
|
7198
|
+
const hasExternalCwd = !!(request.cwd || this.config.cwd);
|
|
7199
|
+
const workspaceRoot = hasExternalCwd ? void 0 : await this.createWorkspace();
|
|
7200
|
+
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
6932
7201
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
6933
7202
|
try {
|
|
6934
|
-
const promptFile =
|
|
7203
|
+
const promptFile = path19.join(cwd, PROMPT_FILENAME);
|
|
6935
7204
|
await writeFile(promptFile, request.question, "utf8");
|
|
6936
7205
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
6937
|
-
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
6938
7206
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
6939
7207
|
if (result.timedOut) {
|
|
6940
7208
|
throw new Error(
|
|
@@ -6976,7 +7244,7 @@ var PiCliProvider = class {
|
|
|
6976
7244
|
args,
|
|
6977
7245
|
executable: this.config.executable,
|
|
6978
7246
|
promptFile,
|
|
6979
|
-
workspace: workspaceRoot,
|
|
7247
|
+
workspace: workspaceRoot ?? cwd,
|
|
6980
7248
|
inputFiles,
|
|
6981
7249
|
logFile: logger?.filePath
|
|
6982
7250
|
},
|
|
@@ -6988,17 +7256,22 @@ var PiCliProvider = class {
|
|
|
6988
7256
|
};
|
|
6989
7257
|
} finally {
|
|
6990
7258
|
await logger?.close();
|
|
6991
|
-
|
|
7259
|
+
if (workspaceRoot) {
|
|
7260
|
+
await this.cleanupWorkspace(workspaceRoot);
|
|
7261
|
+
}
|
|
6992
7262
|
}
|
|
6993
7263
|
}
|
|
6994
7264
|
resolveCwd(workspaceRoot, cwdOverride) {
|
|
6995
7265
|
if (cwdOverride) {
|
|
6996
|
-
return
|
|
7266
|
+
return path19.resolve(cwdOverride);
|
|
7267
|
+
}
|
|
7268
|
+
if (this.config.cwd) {
|
|
7269
|
+
return path19.resolve(this.config.cwd);
|
|
6997
7270
|
}
|
|
6998
|
-
if (
|
|
7271
|
+
if (workspaceRoot) {
|
|
6999
7272
|
return workspaceRoot;
|
|
7000
7273
|
}
|
|
7001
|
-
return
|
|
7274
|
+
return process.cwd();
|
|
7002
7275
|
}
|
|
7003
7276
|
buildPiArgs(prompt, inputFiles) {
|
|
7004
7277
|
const args = [];
|
|
@@ -7076,10 +7349,30 @@ ${prompt}` : prompt;
|
|
|
7076
7349
|
env[envKey] = this.config.apiKey;
|
|
7077
7350
|
}
|
|
7078
7351
|
}
|
|
7352
|
+
if (this.config.subprovider) {
|
|
7353
|
+
const provider = this.config.subprovider.toLowerCase();
|
|
7354
|
+
const PROVIDER_OWN_PREFIXES = {
|
|
7355
|
+
openrouter: ["OPENROUTER_"],
|
|
7356
|
+
anthropic: ["ANTHROPIC_"],
|
|
7357
|
+
openai: ["OPENAI_"],
|
|
7358
|
+
azure: ["AZURE_OPENAI_"],
|
|
7359
|
+
google: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
7360
|
+
gemini: ["GEMINI_", "GOOGLE_GENERATIVE_AI_"],
|
|
7361
|
+
groq: ["GROQ_"],
|
|
7362
|
+
xai: ["XAI_"]
|
|
7363
|
+
};
|
|
7364
|
+
const ownPrefixes = PROVIDER_OWN_PREFIXES[provider] ?? [];
|
|
7365
|
+
const allOtherPrefixes = Object.entries(PROVIDER_OWN_PREFIXES).filter(([key]) => key !== provider).flatMap(([, prefixes]) => prefixes);
|
|
7366
|
+
for (const key of Object.keys(env)) {
|
|
7367
|
+
if (allOtherPrefixes.some((prefix) => key.startsWith(prefix)) && !ownPrefixes.some((prefix) => key.startsWith(prefix))) {
|
|
7368
|
+
delete env[key];
|
|
7369
|
+
}
|
|
7370
|
+
}
|
|
7371
|
+
}
|
|
7079
7372
|
return env;
|
|
7080
7373
|
}
|
|
7081
7374
|
async createWorkspace() {
|
|
7082
|
-
return await mkdtemp(
|
|
7375
|
+
return await mkdtemp(path19.join(tmpdir(), WORKSPACE_PREFIX));
|
|
7083
7376
|
}
|
|
7084
7377
|
async cleanupWorkspace(workspaceRoot) {
|
|
7085
7378
|
try {
|
|
@@ -7089,9 +7382,9 @@ ${prompt}` : prompt;
|
|
|
7089
7382
|
}
|
|
7090
7383
|
resolveLogDirectory() {
|
|
7091
7384
|
if (this.config.logDir) {
|
|
7092
|
-
return
|
|
7385
|
+
return path19.resolve(this.config.logDir);
|
|
7093
7386
|
}
|
|
7094
|
-
return
|
|
7387
|
+
return path19.join(process.cwd(), ".agentv", "logs", "pi-cli");
|
|
7095
7388
|
}
|
|
7096
7389
|
async createStreamLogger(request) {
|
|
7097
7390
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7105,7 +7398,7 @@ ${prompt}` : prompt;
|
|
|
7105
7398
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7106
7399
|
return void 0;
|
|
7107
7400
|
}
|
|
7108
|
-
const filePath =
|
|
7401
|
+
const filePath = path19.join(logDir, buildLogFilename5(request, this.targetName));
|
|
7109
7402
|
try {
|
|
7110
7403
|
const logger = await PiStreamLogger.create({
|
|
7111
7404
|
filePath,
|
|
@@ -7303,6 +7596,10 @@ function summarizePiEvent(event) {
|
|
|
7303
7596
|
}
|
|
7304
7597
|
return `message_update: ${eventType}`;
|
|
7305
7598
|
}
|
|
7599
|
+
case "tool_execution_start":
|
|
7600
|
+
return `tool_start: ${record.toolName}`;
|
|
7601
|
+
case "tool_execution_end":
|
|
7602
|
+
return `tool_end: ${record.toolName}`;
|
|
7306
7603
|
default:
|
|
7307
7604
|
return type;
|
|
7308
7605
|
}
|
|
@@ -7333,25 +7630,89 @@ function parsePiJsonl(output) {
|
|
|
7333
7630
|
return parsed;
|
|
7334
7631
|
}
|
|
7335
7632
|
function extractMessages(events) {
|
|
7633
|
+
let messages;
|
|
7336
7634
|
for (let i = events.length - 1; i >= 0; i--) {
|
|
7337
7635
|
const event = events[i];
|
|
7338
7636
|
if (!event || typeof event !== "object") continue;
|
|
7339
7637
|
const record = event;
|
|
7340
7638
|
if (record.type !== "agent_end") continue;
|
|
7341
|
-
const
|
|
7342
|
-
if (!Array.isArray(
|
|
7343
|
-
|
|
7639
|
+
const msgs = record.messages;
|
|
7640
|
+
if (!Array.isArray(msgs)) continue;
|
|
7641
|
+
messages = msgs.map(convertPiMessage).filter((m) => m !== void 0);
|
|
7642
|
+
break;
|
|
7643
|
+
}
|
|
7644
|
+
if (!messages) {
|
|
7645
|
+
messages = [];
|
|
7646
|
+
for (const event of events) {
|
|
7647
|
+
if (!event || typeof event !== "object") continue;
|
|
7648
|
+
const record = event;
|
|
7649
|
+
if (record.type === "turn_end") {
|
|
7650
|
+
const converted = convertPiMessage(record.message);
|
|
7651
|
+
if (converted) messages.push(converted);
|
|
7652
|
+
}
|
|
7653
|
+
}
|
|
7344
7654
|
}
|
|
7345
|
-
const
|
|
7655
|
+
const eventToolCalls = extractToolCallsFromEvents(events);
|
|
7656
|
+
if (eventToolCalls.length > 0) {
|
|
7657
|
+
injectEventToolCalls(messages, eventToolCalls);
|
|
7658
|
+
}
|
|
7659
|
+
return messages;
|
|
7660
|
+
}
|
|
7661
|
+
function extractToolCallsFromEvents(events) {
|
|
7662
|
+
const starts = /* @__PURE__ */ new Map();
|
|
7663
|
+
const results = /* @__PURE__ */ new Map();
|
|
7346
7664
|
for (const event of events) {
|
|
7347
7665
|
if (!event || typeof event !== "object") continue;
|
|
7348
|
-
const
|
|
7349
|
-
|
|
7350
|
-
|
|
7351
|
-
|
|
7666
|
+
const r = event;
|
|
7667
|
+
const type = r.type;
|
|
7668
|
+
if (type === "tool_execution_start" && typeof r.toolName === "string") {
|
|
7669
|
+
const id = typeof r.toolCallId === "string" ? r.toolCallId : void 0;
|
|
7670
|
+
starts.set(id ?? `anon-${starts.size}`, { tool: r.toolName, input: r.args });
|
|
7671
|
+
} else if (type === "tool_execution_end") {
|
|
7672
|
+
const id = typeof r.toolCallId === "string" ? r.toolCallId : void 0;
|
|
7673
|
+
if (id) results.set(id, r.result);
|
|
7674
|
+
}
|
|
7675
|
+
}
|
|
7676
|
+
const toolCalls = [];
|
|
7677
|
+
for (const [id, { tool: tool2, input }] of starts) {
|
|
7678
|
+
toolCalls.push({
|
|
7679
|
+
tool: tool2,
|
|
7680
|
+
input,
|
|
7681
|
+
id: id.startsWith("anon-") ? void 0 : id,
|
|
7682
|
+
output: results.get(id)
|
|
7683
|
+
});
|
|
7684
|
+
}
|
|
7685
|
+
return toolCalls;
|
|
7686
|
+
}
|
|
7687
|
+
function injectEventToolCalls(messages, eventToolCalls) {
|
|
7688
|
+
const existingIds = /* @__PURE__ */ new Set();
|
|
7689
|
+
const existingTools = /* @__PURE__ */ new Set();
|
|
7690
|
+
for (const msg of messages) {
|
|
7691
|
+
if (!msg.toolCalls) continue;
|
|
7692
|
+
for (const tc of msg.toolCalls) {
|
|
7693
|
+
if (tc.id) existingIds.add(tc.id);
|
|
7694
|
+
existingTools.add(`${tc.tool}:${JSON.stringify(tc.input)}`);
|
|
7695
|
+
}
|
|
7696
|
+
}
|
|
7697
|
+
const missing = eventToolCalls.filter((tc) => {
|
|
7698
|
+
if (tc.id && existingIds.has(tc.id)) return false;
|
|
7699
|
+
if (existingTools.has(`${tc.tool}:${JSON.stringify(tc.input)}`)) return false;
|
|
7700
|
+
return true;
|
|
7701
|
+
});
|
|
7702
|
+
if (missing.length === 0) return;
|
|
7703
|
+
let targetIdx = -1;
|
|
7704
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
7705
|
+
if (messages[i].role === "assistant") {
|
|
7706
|
+
targetIdx = i;
|
|
7707
|
+
break;
|
|
7352
7708
|
}
|
|
7353
7709
|
}
|
|
7354
|
-
|
|
7710
|
+
if (targetIdx >= 0) {
|
|
7711
|
+
const target = messages[targetIdx];
|
|
7712
|
+
messages[targetIdx] = { ...target, toolCalls: [...target.toolCalls ?? [], ...missing] };
|
|
7713
|
+
} else {
|
|
7714
|
+
messages.push({ role: "assistant", content: "", toolCalls: missing });
|
|
7715
|
+
}
|
|
7355
7716
|
}
|
|
7356
7717
|
function extractTokenUsage(events) {
|
|
7357
7718
|
for (let i = events.length - 1; i >= 0; i--) {
|
|
@@ -7446,15 +7807,13 @@ function extractToolCalls3(content) {
|
|
|
7446
7807
|
input: p.input,
|
|
7447
7808
|
id: typeof p.id === "string" ? p.id : void 0
|
|
7448
7809
|
});
|
|
7449
|
-
}
|
|
7450
|
-
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
7810
|
+
} else if ((p.type === "toolCall" || p.type === "tool_call") && typeof p.name === "string") {
|
|
7451
7811
|
toolCalls.push({
|
|
7452
7812
|
tool: p.name,
|
|
7453
|
-
input: p.arguments,
|
|
7813
|
+
input: p.arguments ?? p.input,
|
|
7454
7814
|
id: typeof p.id === "string" ? p.id : void 0
|
|
7455
7815
|
});
|
|
7456
|
-
}
|
|
7457
|
-
if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
|
|
7816
|
+
} else if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
|
|
7458
7817
|
const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
|
|
7459
7818
|
if (existing) {
|
|
7460
7819
|
const idx = toolCalls.indexOf(existing);
|
|
@@ -7544,15 +7903,17 @@ async function defaultPiRunner(options) {
|
|
|
7544
7903
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
7545
7904
|
import { execSync } from "node:child_process";
|
|
7546
7905
|
import { randomUUID as randomUUID8 } from "node:crypto";
|
|
7547
|
-
import { createWriteStream as createWriteStream6 } from "node:fs";
|
|
7906
|
+
import { accessSync, createWriteStream as createWriteStream6 } from "node:fs";
|
|
7548
7907
|
import { mkdir as mkdir7 } from "node:fs/promises";
|
|
7549
|
-
import
|
|
7908
|
+
import path20 from "node:path";
|
|
7550
7909
|
import { createInterface } from "node:readline";
|
|
7910
|
+
import { fileURLToPath as fileURLToPath3 } from "node:url";
|
|
7551
7911
|
var piCodingAgentModule = null;
|
|
7552
7912
|
var piAiModule = null;
|
|
7913
|
+
var loadingPromise = null;
|
|
7553
7914
|
async function promptInstall() {
|
|
7554
7915
|
if (!process.stdout.isTTY) return false;
|
|
7555
|
-
const rl = createInterface({ input: process.stdin, output: process.
|
|
7916
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
7556
7917
|
try {
|
|
7557
7918
|
return await new Promise((resolve) => {
|
|
7558
7919
|
rl.question(
|
|
@@ -7564,43 +7925,74 @@ async function promptInstall() {
|
|
|
7564
7925
|
rl.close();
|
|
7565
7926
|
}
|
|
7566
7927
|
}
|
|
7567
|
-
|
|
7568
|
-
|
|
7928
|
+
function findAgentvRoot() {
|
|
7929
|
+
const thisFile = fileURLToPath3(import.meta.url);
|
|
7930
|
+
let dir = path20.dirname(thisFile);
|
|
7931
|
+
for (let i = 0; i < 10; i++) {
|
|
7569
7932
|
try {
|
|
7933
|
+
const pkg = path20.join(dir, "package.json");
|
|
7934
|
+
accessSync(pkg);
|
|
7935
|
+
return dir;
|
|
7936
|
+
} catch {
|
|
7937
|
+
const parent = path20.dirname(dir);
|
|
7938
|
+
if (parent === dir) break;
|
|
7939
|
+
dir = parent;
|
|
7940
|
+
}
|
|
7941
|
+
}
|
|
7942
|
+
return path20.dirname(thisFile);
|
|
7943
|
+
}
|
|
7944
|
+
async function doLoadSdkModules() {
|
|
7945
|
+
try {
|
|
7946
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
7947
|
+
import("@mariozechner/pi-coding-agent"),
|
|
7948
|
+
import("@mariozechner/pi-ai")
|
|
7949
|
+
]);
|
|
7950
|
+
} catch {
|
|
7951
|
+
if (await promptInstall()) {
|
|
7952
|
+
const installDir = findAgentvRoot();
|
|
7953
|
+
console.error(`Installing @mariozechner/pi-coding-agent into ${installDir}...`);
|
|
7954
|
+
execSync("bun add @mariozechner/pi-coding-agent", {
|
|
7955
|
+
cwd: installDir,
|
|
7956
|
+
stdio: "inherit"
|
|
7957
|
+
});
|
|
7570
7958
|
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
7571
7959
|
import("@mariozechner/pi-coding-agent"),
|
|
7572
7960
|
import("@mariozechner/pi-ai")
|
|
7573
7961
|
]);
|
|
7574
|
-
}
|
|
7575
|
-
|
|
7576
|
-
|
|
7577
|
-
|
|
7578
|
-
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
7579
|
-
import("@mariozechner/pi-coding-agent"),
|
|
7580
|
-
import("@mariozechner/pi-ai")
|
|
7581
|
-
]);
|
|
7582
|
-
} else {
|
|
7583
|
-
throw new Error(
|
|
7584
|
-
"pi-coding-agent SDK is not installed. Install it with:\n bun add @mariozechner/pi-coding-agent"
|
|
7585
|
-
);
|
|
7586
|
-
}
|
|
7962
|
+
} else {
|
|
7963
|
+
throw new Error(
|
|
7964
|
+
"pi-coding-agent SDK is not installed. Install it with:\n bun add @mariozechner/pi-coding-agent"
|
|
7965
|
+
);
|
|
7587
7966
|
}
|
|
7588
7967
|
}
|
|
7968
|
+
}
|
|
7969
|
+
async function loadSdkModules() {
|
|
7970
|
+
if (!piCodingAgentModule || !piAiModule) {
|
|
7971
|
+
if (!loadingPromise) {
|
|
7972
|
+
loadingPromise = doLoadSdkModules().catch((err) => {
|
|
7973
|
+
loadingPromise = null;
|
|
7974
|
+
throw err;
|
|
7975
|
+
});
|
|
7976
|
+
}
|
|
7977
|
+
await loadingPromise;
|
|
7978
|
+
}
|
|
7979
|
+
const piSdk = piCodingAgentModule;
|
|
7980
|
+
const piAi = piAiModule;
|
|
7589
7981
|
const toolMap = {
|
|
7590
|
-
read:
|
|
7591
|
-
bash:
|
|
7592
|
-
edit:
|
|
7593
|
-
write:
|
|
7594
|
-
grep:
|
|
7595
|
-
find:
|
|
7596
|
-
ls:
|
|
7982
|
+
read: piSdk.readTool,
|
|
7983
|
+
bash: piSdk.bashTool,
|
|
7984
|
+
edit: piSdk.editTool,
|
|
7985
|
+
write: piSdk.writeTool,
|
|
7986
|
+
grep: piSdk.grepTool,
|
|
7987
|
+
find: piSdk.findTool,
|
|
7988
|
+
ls: piSdk.lsTool
|
|
7597
7989
|
};
|
|
7598
7990
|
return {
|
|
7599
|
-
createAgentSession:
|
|
7600
|
-
codingTools:
|
|
7991
|
+
createAgentSession: piSdk.createAgentSession,
|
|
7992
|
+
codingTools: piSdk.codingTools,
|
|
7601
7993
|
toolMap,
|
|
7602
|
-
SessionManager:
|
|
7603
|
-
getModel:
|
|
7994
|
+
SessionManager: piSdk.SessionManager,
|
|
7995
|
+
getModel: piAi.getModel
|
|
7604
7996
|
};
|
|
7605
7997
|
}
|
|
7606
7998
|
var PiCodingAgentProvider = class {
|
|
@@ -7789,10 +8181,10 @@ ${fileList}`;
|
|
|
7789
8181
|
}
|
|
7790
8182
|
resolveCwd(cwdOverride) {
|
|
7791
8183
|
if (cwdOverride) {
|
|
7792
|
-
return
|
|
8184
|
+
return path20.resolve(cwdOverride);
|
|
7793
8185
|
}
|
|
7794
8186
|
if (this.config.cwd) {
|
|
7795
|
-
return
|
|
8187
|
+
return path20.resolve(this.config.cwd);
|
|
7796
8188
|
}
|
|
7797
8189
|
return process.cwd();
|
|
7798
8190
|
}
|
|
@@ -7811,9 +8203,9 @@ ${fileList}`;
|
|
|
7811
8203
|
}
|
|
7812
8204
|
resolveLogDirectory() {
|
|
7813
8205
|
if (this.config.logDir) {
|
|
7814
|
-
return
|
|
8206
|
+
return path20.resolve(this.config.logDir);
|
|
7815
8207
|
}
|
|
7816
|
-
return
|
|
8208
|
+
return path20.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
7817
8209
|
}
|
|
7818
8210
|
async createStreamLogger(request) {
|
|
7819
8211
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7827,7 +8219,7 @@ ${fileList}`;
|
|
|
7827
8219
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7828
8220
|
return void 0;
|
|
7829
8221
|
}
|
|
7830
|
-
const filePath =
|
|
8222
|
+
const filePath = path20.join(logDir, buildLogFilename6(request, this.targetName));
|
|
7831
8223
|
try {
|
|
7832
8224
|
const logger = await PiStreamLogger2.create({
|
|
7833
8225
|
filePath,
|
|
@@ -8040,18 +8432,18 @@ var ProviderRegistry = class {
|
|
|
8040
8432
|
|
|
8041
8433
|
// src/evaluation/providers/vscode-provider.ts
|
|
8042
8434
|
import { exec as exec2 } from "node:child_process";
|
|
8043
|
-
import { constants as constants3, access as access3, stat as
|
|
8044
|
-
import
|
|
8435
|
+
import { constants as constants3, access as access3, stat as stat5 } from "node:fs/promises";
|
|
8436
|
+
import path32 from "node:path";
|
|
8045
8437
|
import { promisify as promisify3 } from "node:util";
|
|
8046
8438
|
|
|
8047
8439
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
8048
|
-
import { stat as
|
|
8049
|
-
import
|
|
8440
|
+
import { stat as stat4, writeFile as writeFile4 } from "node:fs/promises";
|
|
8441
|
+
import path30 from "node:path";
|
|
8050
8442
|
|
|
8051
8443
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
8052
8444
|
import { constants as constants2 } from "node:fs";
|
|
8053
|
-
import { access as access2, mkdir as mkdir8, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
8054
|
-
import
|
|
8445
|
+
import { access as access2, mkdir as mkdir8, readdir as readdir2, rm as rm2, stat as stat2 } from "node:fs/promises";
|
|
8446
|
+
import path21 from "node:path";
|
|
8055
8447
|
async function pathExists(target) {
|
|
8056
8448
|
try {
|
|
8057
8449
|
await access2(target, constants2.F_OK);
|
|
@@ -8064,10 +8456,10 @@ async function ensureDir(target) {
|
|
|
8064
8456
|
await mkdir8(target, { recursive: true });
|
|
8065
8457
|
}
|
|
8066
8458
|
async function readDirEntries(target) {
|
|
8067
|
-
const entries = await
|
|
8459
|
+
const entries = await readdir2(target, { withFileTypes: true });
|
|
8068
8460
|
return entries.map((entry) => ({
|
|
8069
8461
|
name: entry.name,
|
|
8070
|
-
absolutePath:
|
|
8462
|
+
absolutePath: path21.join(target, entry.name),
|
|
8071
8463
|
isDirectory: entry.isDirectory()
|
|
8072
8464
|
}));
|
|
8073
8465
|
}
|
|
@@ -8082,9 +8474,9 @@ async function removeIfExists(target) {
|
|
|
8082
8474
|
}
|
|
8083
8475
|
|
|
8084
8476
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
8085
|
-
import
|
|
8477
|
+
import path22 from "node:path";
|
|
8086
8478
|
function pathToFileUri2(filePath) {
|
|
8087
|
-
const absolutePath =
|
|
8479
|
+
const absolutePath = path22.isAbsolute(filePath) ? filePath : path22.resolve(filePath);
|
|
8088
8480
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
8089
8481
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
8090
8482
|
return `file:///${normalizedPath}`;
|
|
@@ -8093,7 +8485,7 @@ function pathToFileUri2(filePath) {
|
|
|
8093
8485
|
}
|
|
8094
8486
|
|
|
8095
8487
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
8096
|
-
import
|
|
8488
|
+
import path23 from "node:path";
|
|
8097
8489
|
|
|
8098
8490
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
8099
8491
|
function renderTemplate2(content, variables) {
|
|
@@ -8185,8 +8577,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
8185
8577
|
});
|
|
8186
8578
|
}
|
|
8187
8579
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
8188
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
8189
|
-
const responseList = responseFiles.map((file) => `"${
|
|
8580
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path23.basename(file)}`).join("\n");
|
|
8581
|
+
const responseList = responseFiles.map((file) => `"${path23.basename(file)}"`).join(", ");
|
|
8190
8582
|
return renderTemplate2(templateContent, {
|
|
8191
8583
|
requestFiles: requestLines,
|
|
8192
8584
|
responseList
|
|
@@ -8194,8 +8586,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
8194
8586
|
}
|
|
8195
8587
|
|
|
8196
8588
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
8197
|
-
import { readFile as
|
|
8198
|
-
import
|
|
8589
|
+
import { readFile as readFile9 } from "node:fs/promises";
|
|
8590
|
+
import path24 from "node:path";
|
|
8199
8591
|
|
|
8200
8592
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
8201
8593
|
function sleep2(ms) {
|
|
@@ -8233,7 +8625,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8233
8625
|
const maxAttempts = 10;
|
|
8234
8626
|
while (attempts < maxAttempts) {
|
|
8235
8627
|
try {
|
|
8236
|
-
const content = await
|
|
8628
|
+
const content = await readFile9(responseFileFinal, { encoding: "utf8" });
|
|
8237
8629
|
if (!silent) {
|
|
8238
8630
|
process.stdout.write(`${content}
|
|
8239
8631
|
`);
|
|
@@ -8254,7 +8646,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8254
8646
|
}
|
|
8255
8647
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
8256
8648
|
if (!silent) {
|
|
8257
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
8649
|
+
const fileList = responseFilesFinal.map((file) => path24.basename(file)).join(", ");
|
|
8258
8650
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
8259
8651
|
}
|
|
8260
8652
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -8263,7 +8655,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8263
8655
|
while (pending.size > 0) {
|
|
8264
8656
|
if (Date.now() >= deadline) {
|
|
8265
8657
|
if (!silent) {
|
|
8266
|
-
const remaining = [...pending].map((f) =>
|
|
8658
|
+
const remaining = [...pending].map((f) => path24.basename(f)).join(", ");
|
|
8267
8659
|
console.error(
|
|
8268
8660
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
8269
8661
|
);
|
|
@@ -8290,7 +8682,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8290
8682
|
const maxAttempts = 10;
|
|
8291
8683
|
while (attempts < maxAttempts) {
|
|
8292
8684
|
try {
|
|
8293
|
-
const content = await
|
|
8685
|
+
const content = await readFile9(file, { encoding: "utf8" });
|
|
8294
8686
|
if (!silent) {
|
|
8295
8687
|
process.stdout.write(`${content}
|
|
8296
8688
|
`);
|
|
@@ -8314,15 +8706,15 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8314
8706
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
8315
8707
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
8316
8708
|
import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
|
|
8317
|
-
import
|
|
8709
|
+
import path27 from "node:path";
|
|
8318
8710
|
import { promisify as promisify2 } from "node:util";
|
|
8319
8711
|
|
|
8320
8712
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
8321
|
-
import
|
|
8713
|
+
import path26 from "node:path";
|
|
8322
8714
|
|
|
8323
8715
|
// src/paths.ts
|
|
8324
8716
|
import os2 from "node:os";
|
|
8325
|
-
import
|
|
8717
|
+
import path25 from "node:path";
|
|
8326
8718
|
var logged = false;
|
|
8327
8719
|
function getAgentvHome() {
|
|
8328
8720
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8333,19 +8725,19 @@ function getAgentvHome() {
|
|
|
8333
8725
|
}
|
|
8334
8726
|
return envHome;
|
|
8335
8727
|
}
|
|
8336
|
-
return
|
|
8728
|
+
return path25.join(os2.homedir(), ".agentv");
|
|
8337
8729
|
}
|
|
8338
8730
|
function getWorkspacesRoot() {
|
|
8339
|
-
return
|
|
8731
|
+
return path25.join(getAgentvHome(), "workspaces");
|
|
8340
8732
|
}
|
|
8341
8733
|
function getSubagentsRoot() {
|
|
8342
|
-
return
|
|
8734
|
+
return path25.join(getAgentvHome(), "subagents");
|
|
8343
8735
|
}
|
|
8344
8736
|
function getTraceStateRoot() {
|
|
8345
|
-
return
|
|
8737
|
+
return path25.join(getAgentvHome(), "trace-state");
|
|
8346
8738
|
}
|
|
8347
8739
|
function getWorkspacePoolRoot() {
|
|
8348
|
-
return
|
|
8740
|
+
return path25.join(getAgentvHome(), "workspace-pool");
|
|
8349
8741
|
}
|
|
8350
8742
|
|
|
8351
8743
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -8353,7 +8745,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
8353
8745
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
8354
8746
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
8355
8747
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
8356
|
-
return
|
|
8748
|
+
return path26.join(getSubagentsRoot(), folder);
|
|
8357
8749
|
}
|
|
8358
8750
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
8359
8751
|
|
|
@@ -8420,11 +8812,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8420
8812
|
await raceSpawnError(child);
|
|
8421
8813
|
return true;
|
|
8422
8814
|
}
|
|
8423
|
-
const aliveFile =
|
|
8815
|
+
const aliveFile = path27.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
8424
8816
|
await removeIfExists(aliveFile);
|
|
8425
|
-
const githubAgentsDir =
|
|
8817
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8426
8818
|
await mkdir9(githubAgentsDir, { recursive: true });
|
|
8427
|
-
const wakeupDst =
|
|
8819
|
+
const wakeupDst = path27.join(githubAgentsDir, "wakeup.md");
|
|
8428
8820
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
8429
8821
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
8430
8822
|
label: "open-workspace"
|
|
@@ -8437,7 +8829,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8437
8829
|
"chat",
|
|
8438
8830
|
"-m",
|
|
8439
8831
|
wakeupChatId,
|
|
8440
|
-
`create a file named .alive in the ${
|
|
8832
|
+
`create a file named .alive in the ${path27.basename(subagentDir)} folder`
|
|
8441
8833
|
];
|
|
8442
8834
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
8443
8835
|
await raceSpawnError(wakeupChild);
|
|
@@ -8452,10 +8844,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8452
8844
|
return true;
|
|
8453
8845
|
}
|
|
8454
8846
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
8455
|
-
const workspacePath =
|
|
8456
|
-
const messagesDir =
|
|
8847
|
+
const workspacePath = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
8848
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8457
8849
|
await mkdir9(messagesDir, { recursive: true });
|
|
8458
|
-
const reqFile =
|
|
8850
|
+
const reqFile = path27.join(messagesDir, `${timestamp}_req.md`);
|
|
8459
8851
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
8460
8852
|
const reqUri = pathToFileUri2(reqFile);
|
|
8461
8853
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -8463,16 +8855,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8463
8855
|
chatArgs.push("-a", attachment);
|
|
8464
8856
|
}
|
|
8465
8857
|
chatArgs.push("-a", reqFile);
|
|
8466
|
-
chatArgs.push(`Follow instructions in [${
|
|
8858
|
+
chatArgs.push(`Follow instructions in [${path27.basename(reqFile)}](${reqUri})`);
|
|
8467
8859
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8468
8860
|
workspacePath,
|
|
8469
|
-
|
|
8861
|
+
path27.basename(subagentDir),
|
|
8470
8862
|
subagentDir,
|
|
8471
8863
|
vscodeCmd
|
|
8472
8864
|
);
|
|
8473
8865
|
if (!workspaceReady) {
|
|
8474
8866
|
throw new Error(
|
|
8475
|
-
`VS Code workspace '${
|
|
8867
|
+
`VS Code workspace '${path27.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8476
8868
|
);
|
|
8477
8869
|
}
|
|
8478
8870
|
await sleep2(500);
|
|
@@ -8480,8 +8872,8 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8480
8872
|
await raceSpawnError(child);
|
|
8481
8873
|
}
|
|
8482
8874
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
8483
|
-
const workspacePath =
|
|
8484
|
-
const messagesDir =
|
|
8875
|
+
const workspacePath = path27.join(subagentDir, `${path27.basename(subagentDir)}.code-workspace`);
|
|
8876
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8485
8877
|
await mkdir9(messagesDir, { recursive: true });
|
|
8486
8878
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
8487
8879
|
for (const attachment of attachmentPaths) {
|
|
@@ -8490,13 +8882,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8490
8882
|
chatArgs.push(chatInstruction);
|
|
8491
8883
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8492
8884
|
workspacePath,
|
|
8493
|
-
|
|
8885
|
+
path27.basename(subagentDir),
|
|
8494
8886
|
subagentDir,
|
|
8495
8887
|
vscodeCmd
|
|
8496
8888
|
);
|
|
8497
8889
|
if (!workspaceReady) {
|
|
8498
8890
|
throw new Error(
|
|
8499
|
-
`VS Code workspace '${
|
|
8891
|
+
`VS Code workspace '${path27.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8500
8892
|
);
|
|
8501
8893
|
}
|
|
8502
8894
|
await sleep2(500);
|
|
@@ -8505,11 +8897,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8505
8897
|
}
|
|
8506
8898
|
|
|
8507
8899
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
8508
|
-
import { copyFile, mkdir as mkdir10, readFile as
|
|
8509
|
-
import
|
|
8900
|
+
import { copyFile, mkdir as mkdir10, readFile as readFile10, readdir as readdir3, stat as stat3, writeFile as writeFile3 } from "node:fs/promises";
|
|
8901
|
+
import path29 from "node:path";
|
|
8510
8902
|
|
|
8511
8903
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
8512
|
-
import
|
|
8904
|
+
import path28 from "node:path";
|
|
8513
8905
|
import JSON5 from "json5";
|
|
8514
8906
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
8515
8907
|
let workspace;
|
|
@@ -8526,10 +8918,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8526
8918
|
}
|
|
8527
8919
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
8528
8920
|
const folderPath = folder.path;
|
|
8529
|
-
if (
|
|
8921
|
+
if (path28.isAbsolute(folderPath)) {
|
|
8530
8922
|
return folder;
|
|
8531
8923
|
}
|
|
8532
|
-
const absolutePath =
|
|
8924
|
+
const absolutePath = path28.resolve(templateDir, folderPath);
|
|
8533
8925
|
return {
|
|
8534
8926
|
...folder,
|
|
8535
8927
|
path: absolutePath
|
|
@@ -8551,19 +8943,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8551
8943
|
if (locationMap && typeof locationMap === "object") {
|
|
8552
8944
|
const transformedMap = {};
|
|
8553
8945
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
8554
|
-
const isAbsolute =
|
|
8946
|
+
const isAbsolute = path28.isAbsolute(locationPath);
|
|
8555
8947
|
if (isAbsolute) {
|
|
8556
8948
|
transformedMap[locationPath] = value;
|
|
8557
8949
|
} else {
|
|
8558
8950
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
8559
8951
|
if (firstGlobIndex === -1) {
|
|
8560
|
-
const resolvedPath =
|
|
8952
|
+
const resolvedPath = path28.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
8561
8953
|
transformedMap[resolvedPath] = value;
|
|
8562
8954
|
} else {
|
|
8563
8955
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
8564
8956
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
8565
8957
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
8566
|
-
const resolvedPath = (
|
|
8958
|
+
const resolvedPath = (path28.resolve(templateDir, basePath) + patternPath).replace(
|
|
8567
8959
|
/\\/g,
|
|
8568
8960
|
"/"
|
|
8569
8961
|
);
|
|
@@ -8604,7 +8996,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8604
8996
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
8605
8997
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
8606
8998
|
for (const subagent of subagents) {
|
|
8607
|
-
const lockFile =
|
|
8999
|
+
const lockFile = path29.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
8608
9000
|
if (!await pathExists(lockFile)) {
|
|
8609
9001
|
return subagent.absolutePath;
|
|
8610
9002
|
}
|
|
@@ -8614,26 +9006,26 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8614
9006
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
8615
9007
|
let workspaceContent;
|
|
8616
9008
|
if (workspaceTemplate) {
|
|
8617
|
-
const workspaceSrc =
|
|
9009
|
+
const workspaceSrc = path29.resolve(workspaceTemplate);
|
|
8618
9010
|
if (!await pathExists(workspaceSrc)) {
|
|
8619
9011
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
8620
9012
|
}
|
|
8621
|
-
const stats = await
|
|
9013
|
+
const stats = await stat3(workspaceSrc);
|
|
8622
9014
|
if (!stats.isFile()) {
|
|
8623
9015
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
8624
9016
|
}
|
|
8625
|
-
const templateText = await
|
|
9017
|
+
const templateText = await readFile10(workspaceSrc, "utf8");
|
|
8626
9018
|
workspaceContent = JSON.parse(templateText);
|
|
8627
9019
|
} else {
|
|
8628
9020
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
8629
9021
|
}
|
|
8630
|
-
const workspaceName = `${
|
|
8631
|
-
const workspaceDst =
|
|
8632
|
-
const templateDir = workspaceTemplate ?
|
|
9022
|
+
const workspaceName = `${path29.basename(subagentDir)}.code-workspace`;
|
|
9023
|
+
const workspaceDst = path29.join(subagentDir, workspaceName);
|
|
9024
|
+
const templateDir = workspaceTemplate ? path29.dirname(path29.resolve(workspaceTemplate)) : subagentDir;
|
|
8633
9025
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
8634
9026
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
8635
9027
|
if (cwd) {
|
|
8636
|
-
const absCwd =
|
|
9028
|
+
const absCwd = path29.resolve(cwd);
|
|
8637
9029
|
const parsed = JSON.parse(transformedContent);
|
|
8638
9030
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
8639
9031
|
if (!alreadyPresent) {
|
|
@@ -8642,35 +9034,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8642
9034
|
}
|
|
8643
9035
|
}
|
|
8644
9036
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
8645
|
-
const messagesDir =
|
|
9037
|
+
const messagesDir = path29.join(subagentDir, "messages");
|
|
8646
9038
|
await mkdir10(messagesDir, { recursive: true });
|
|
8647
9039
|
return { workspace: workspaceDst, messagesDir };
|
|
8648
9040
|
}
|
|
8649
9041
|
async function createSubagentLock(subagentDir) {
|
|
8650
|
-
const messagesDir =
|
|
9042
|
+
const messagesDir = path29.join(subagentDir, "messages");
|
|
8651
9043
|
if (await pathExists(messagesDir)) {
|
|
8652
|
-
const files = await
|
|
9044
|
+
const files = await readdir3(messagesDir);
|
|
8653
9045
|
await Promise.all(
|
|
8654
9046
|
files.map(async (file) => {
|
|
8655
|
-
const target =
|
|
9047
|
+
const target = path29.join(messagesDir, file);
|
|
8656
9048
|
await removeIfExists(target);
|
|
8657
9049
|
})
|
|
8658
9050
|
);
|
|
8659
9051
|
}
|
|
8660
|
-
const githubAgentsDir =
|
|
9052
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
8661
9053
|
if (await pathExists(githubAgentsDir)) {
|
|
8662
|
-
const agentFiles = await
|
|
9054
|
+
const agentFiles = await readdir3(githubAgentsDir);
|
|
8663
9055
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
8664
9056
|
await Promise.all(
|
|
8665
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
9057
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path29.join(githubAgentsDir, file)))
|
|
8666
9058
|
);
|
|
8667
9059
|
}
|
|
8668
|
-
const lockFile =
|
|
9060
|
+
const lockFile = path29.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8669
9061
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
8670
9062
|
return lockFile;
|
|
8671
9063
|
}
|
|
8672
9064
|
async function removeSubagentLock(subagentDir) {
|
|
8673
|
-
const lockFile =
|
|
9065
|
+
const lockFile = path29.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8674
9066
|
await removeIfExists(lockFile);
|
|
8675
9067
|
}
|
|
8676
9068
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -8690,9 +9082,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
8690
9082
|
return 1;
|
|
8691
9083
|
}
|
|
8692
9084
|
if (promptFile) {
|
|
8693
|
-
const githubAgentsDir =
|
|
9085
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
8694
9086
|
await mkdir10(githubAgentsDir, { recursive: true });
|
|
8695
|
-
const agentFile =
|
|
9087
|
+
const agentFile = path29.join(githubAgentsDir, `${chatId}.md`);
|
|
8696
9088
|
try {
|
|
8697
9089
|
await copyFile(promptFile, agentFile);
|
|
8698
9090
|
} catch (error) {
|
|
@@ -8711,11 +9103,11 @@ async function resolvePromptFile(promptFile) {
|
|
|
8711
9103
|
if (!promptFile) {
|
|
8712
9104
|
return void 0;
|
|
8713
9105
|
}
|
|
8714
|
-
const resolvedPrompt =
|
|
9106
|
+
const resolvedPrompt = path30.resolve(promptFile);
|
|
8715
9107
|
if (!await pathExists(resolvedPrompt)) {
|
|
8716
9108
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
8717
9109
|
}
|
|
8718
|
-
const promptStats = await
|
|
9110
|
+
const promptStats = await stat4(resolvedPrompt);
|
|
8719
9111
|
if (!promptStats.isFile()) {
|
|
8720
9112
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
8721
9113
|
}
|
|
@@ -8727,7 +9119,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
8727
9119
|
}
|
|
8728
9120
|
const resolved = [];
|
|
8729
9121
|
for (const attachment of extraAttachments) {
|
|
8730
|
-
const resolvedPath =
|
|
9122
|
+
const resolvedPath = path30.resolve(attachment);
|
|
8731
9123
|
if (!await pathExists(resolvedPath)) {
|
|
8732
9124
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
8733
9125
|
}
|
|
@@ -8769,7 +9161,7 @@ async function dispatchAgentSession(options) {
|
|
|
8769
9161
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8770
9162
|
};
|
|
8771
9163
|
}
|
|
8772
|
-
const subagentName =
|
|
9164
|
+
const subagentName = path30.basename(subagentDir);
|
|
8773
9165
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8774
9166
|
const preparationResult = await prepareSubagentDirectory(
|
|
8775
9167
|
subagentDir,
|
|
@@ -8797,9 +9189,9 @@ async function dispatchAgentSession(options) {
|
|
|
8797
9189
|
};
|
|
8798
9190
|
}
|
|
8799
9191
|
const timestamp = generateTimestamp();
|
|
8800
|
-
const messagesDir =
|
|
8801
|
-
const responseFileTmp =
|
|
8802
|
-
const responseFileFinal =
|
|
9192
|
+
const messagesDir = path30.join(subagentDir, "messages");
|
|
9193
|
+
const responseFileTmp = path30.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
9194
|
+
const responseFileFinal = path30.join(messagesDir, `${timestamp}_res.md`);
|
|
8803
9195
|
const requestInstructions = createRequestPrompt(
|
|
8804
9196
|
userQuery,
|
|
8805
9197
|
responseFileTmp,
|
|
@@ -8904,7 +9296,7 @@ async function dispatchBatchAgent(options) {
|
|
|
8904
9296
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8905
9297
|
};
|
|
8906
9298
|
}
|
|
8907
|
-
subagentName =
|
|
9299
|
+
subagentName = path30.basename(subagentDir);
|
|
8908
9300
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8909
9301
|
const preparationResult = await prepareSubagentDirectory(
|
|
8910
9302
|
subagentDir,
|
|
@@ -8935,17 +9327,17 @@ async function dispatchBatchAgent(options) {
|
|
|
8935
9327
|
};
|
|
8936
9328
|
}
|
|
8937
9329
|
const timestamp = generateTimestamp();
|
|
8938
|
-
const messagesDir =
|
|
9330
|
+
const messagesDir = path30.join(subagentDir, "messages");
|
|
8939
9331
|
requestFiles = userQueries.map(
|
|
8940
|
-
(_, index) =>
|
|
9332
|
+
(_, index) => path30.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
8941
9333
|
);
|
|
8942
9334
|
const responseTmpFiles = userQueries.map(
|
|
8943
|
-
(_, index) =>
|
|
9335
|
+
(_, index) => path30.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
8944
9336
|
);
|
|
8945
9337
|
responseFilesFinal = userQueries.map(
|
|
8946
|
-
(_, index) =>
|
|
9338
|
+
(_, index) => path30.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
8947
9339
|
);
|
|
8948
|
-
const orchestratorFile =
|
|
9340
|
+
const orchestratorFile = path30.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
8949
9341
|
if (!dryRun) {
|
|
8950
9342
|
await Promise.all(
|
|
8951
9343
|
userQueries.map((query, index) => {
|
|
@@ -9031,7 +9423,7 @@ async function dispatchBatchAgent(options) {
|
|
|
9031
9423
|
|
|
9032
9424
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
9033
9425
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
9034
|
-
import
|
|
9426
|
+
import path31 from "node:path";
|
|
9035
9427
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
9036
9428
|
folders: [
|
|
9037
9429
|
{
|
|
@@ -9062,7 +9454,7 @@ async function provisionSubagents(options) {
|
|
|
9062
9454
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
9063
9455
|
throw new Error("subagents must be a positive integer");
|
|
9064
9456
|
}
|
|
9065
|
-
const targetPath =
|
|
9457
|
+
const targetPath = path31.resolve(targetRoot);
|
|
9066
9458
|
if (!dryRun) {
|
|
9067
9459
|
await ensureDir(targetPath);
|
|
9068
9460
|
}
|
|
@@ -9082,7 +9474,7 @@ async function provisionSubagents(options) {
|
|
|
9082
9474
|
continue;
|
|
9083
9475
|
}
|
|
9084
9476
|
highestNumber = Math.max(highestNumber, parsed);
|
|
9085
|
-
const lockFile =
|
|
9477
|
+
const lockFile = path31.join(entry.absolutePath, lockName);
|
|
9086
9478
|
const locked = await pathExists(lockFile);
|
|
9087
9479
|
if (locked) {
|
|
9088
9480
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -9099,10 +9491,10 @@ async function provisionSubagents(options) {
|
|
|
9099
9491
|
break;
|
|
9100
9492
|
}
|
|
9101
9493
|
const subagentDir = subagent.absolutePath;
|
|
9102
|
-
const githubAgentsDir =
|
|
9103
|
-
const lockFile =
|
|
9104
|
-
const workspaceDst =
|
|
9105
|
-
const wakeupDst =
|
|
9494
|
+
const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
|
|
9495
|
+
const lockFile = path31.join(subagentDir, lockName);
|
|
9496
|
+
const workspaceDst = path31.join(subagentDir, `${path31.basename(subagentDir)}.code-workspace`);
|
|
9497
|
+
const wakeupDst = path31.join(githubAgentsDir, "wakeup.md");
|
|
9106
9498
|
const isLocked = await pathExists(lockFile);
|
|
9107
9499
|
if (isLocked && !force) {
|
|
9108
9500
|
continue;
|
|
@@ -9140,10 +9532,10 @@ async function provisionSubagents(options) {
|
|
|
9140
9532
|
let nextIndex = highestNumber;
|
|
9141
9533
|
while (subagentsProvisioned < subagents) {
|
|
9142
9534
|
nextIndex += 1;
|
|
9143
|
-
const subagentDir =
|
|
9144
|
-
const githubAgentsDir =
|
|
9145
|
-
const workspaceDst =
|
|
9146
|
-
const wakeupDst =
|
|
9535
|
+
const subagentDir = path31.join(targetPath, `subagent-${nextIndex}`);
|
|
9536
|
+
const githubAgentsDir = path31.join(subagentDir, ".github", "agents");
|
|
9537
|
+
const workspaceDst = path31.join(subagentDir, `${path31.basename(subagentDir)}.code-workspace`);
|
|
9538
|
+
const wakeupDst = path31.join(githubAgentsDir, "wakeup.md");
|
|
9147
9539
|
if (!dryRun) {
|
|
9148
9540
|
await ensureDir(subagentDir);
|
|
9149
9541
|
await ensureDir(githubAgentsDir);
|
|
@@ -9333,7 +9725,7 @@ var VSCodeProvider = class {
|
|
|
9333
9725
|
async function locateVSCodeExecutable(candidate) {
|
|
9334
9726
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
9335
9727
|
if (includesPathSeparator) {
|
|
9336
|
-
const resolved =
|
|
9728
|
+
const resolved = path32.isAbsolute(candidate) ? candidate : path32.resolve(candidate);
|
|
9337
9729
|
try {
|
|
9338
9730
|
await access3(resolved, constants3.F_OK);
|
|
9339
9731
|
return resolved;
|
|
@@ -9362,7 +9754,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
9362
9754
|
return void 0;
|
|
9363
9755
|
}
|
|
9364
9756
|
try {
|
|
9365
|
-
const stats = await
|
|
9757
|
+
const stats = await stat5(path32.resolve(template));
|
|
9366
9758
|
return stats.isFile() ? template : void 0;
|
|
9367
9759
|
} catch {
|
|
9368
9760
|
return template;
|
|
@@ -9386,7 +9778,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
9386
9778
|
return "";
|
|
9387
9779
|
}
|
|
9388
9780
|
const buildList = (files) => files.map((absolutePath) => {
|
|
9389
|
-
const fileName =
|
|
9781
|
+
const fileName = path32.basename(absolutePath);
|
|
9390
9782
|
const fileUri = pathToFileUri3(absolutePath);
|
|
9391
9783
|
return `* [${fileName}](${fileUri})`;
|
|
9392
9784
|
});
|
|
@@ -9407,7 +9799,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9407
9799
|
}
|
|
9408
9800
|
const unique = /* @__PURE__ */ new Map();
|
|
9409
9801
|
for (const attachment of attachments) {
|
|
9410
|
-
const absolutePath =
|
|
9802
|
+
const absolutePath = path32.resolve(attachment);
|
|
9411
9803
|
if (!unique.has(absolutePath)) {
|
|
9412
9804
|
unique.set(absolutePath, absolutePath);
|
|
9413
9805
|
}
|
|
@@ -9415,7 +9807,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9415
9807
|
return Array.from(unique.values());
|
|
9416
9808
|
}
|
|
9417
9809
|
function pathToFileUri3(filePath) {
|
|
9418
|
-
const absolutePath =
|
|
9810
|
+
const absolutePath = path32.isAbsolute(filePath) ? filePath : path32.resolve(filePath);
|
|
9419
9811
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9420
9812
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9421
9813
|
return `file:///${normalizedPath}`;
|
|
@@ -9428,7 +9820,7 @@ function normalizeAttachments(attachments) {
|
|
|
9428
9820
|
}
|
|
9429
9821
|
const deduped = /* @__PURE__ */ new Set();
|
|
9430
9822
|
for (const attachment of attachments) {
|
|
9431
|
-
deduped.add(
|
|
9823
|
+
deduped.add(path32.resolve(attachment));
|
|
9432
9824
|
}
|
|
9433
9825
|
return Array.from(deduped);
|
|
9434
9826
|
}
|
|
@@ -9437,7 +9829,7 @@ function mergeAttachments(all) {
|
|
|
9437
9829
|
for (const list of all) {
|
|
9438
9830
|
if (!list) continue;
|
|
9439
9831
|
for (const inputFile of list) {
|
|
9440
|
-
deduped.add(
|
|
9832
|
+
deduped.add(path32.resolve(inputFile));
|
|
9441
9833
|
}
|
|
9442
9834
|
}
|
|
9443
9835
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -9485,8 +9877,8 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
9485
9877
|
|
|
9486
9878
|
// src/evaluation/providers/targets-file.ts
|
|
9487
9879
|
import { constants as constants4 } from "node:fs";
|
|
9488
|
-
import { access as access4, readFile as
|
|
9489
|
-
import
|
|
9880
|
+
import { access as access4, readFile as readFile11 } from "node:fs/promises";
|
|
9881
|
+
import path33 from "node:path";
|
|
9490
9882
|
import { parse as parse4 } from "yaml";
|
|
9491
9883
|
function isRecord(value) {
|
|
9492
9884
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -9523,11 +9915,11 @@ async function fileExists3(filePath) {
|
|
|
9523
9915
|
}
|
|
9524
9916
|
}
|
|
9525
9917
|
async function readTargetDefinitions(filePath) {
|
|
9526
|
-
const absolutePath =
|
|
9918
|
+
const absolutePath = path33.resolve(filePath);
|
|
9527
9919
|
if (!await fileExists3(absolutePath)) {
|
|
9528
9920
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
9529
9921
|
}
|
|
9530
|
-
const raw = await
|
|
9922
|
+
const raw = await readFile11(absolutePath, "utf8");
|
|
9531
9923
|
const parsed = parse4(raw);
|
|
9532
9924
|
if (!isRecord(parsed)) {
|
|
9533
9925
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -9543,16 +9935,16 @@ function listTargetNames(definitions) {
|
|
|
9543
9935
|
}
|
|
9544
9936
|
|
|
9545
9937
|
// src/evaluation/providers/provider-discovery.ts
|
|
9546
|
-
import
|
|
9938
|
+
import path34 from "node:path";
|
|
9547
9939
|
import fg from "fast-glob";
|
|
9548
9940
|
async function discoverProviders(registry, baseDir) {
|
|
9549
9941
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
9550
9942
|
const candidateDirs = [];
|
|
9551
|
-
let dir =
|
|
9552
|
-
const root =
|
|
9943
|
+
let dir = path34.resolve(baseDir);
|
|
9944
|
+
const root = path34.parse(dir).root;
|
|
9553
9945
|
while (dir !== root) {
|
|
9554
|
-
candidateDirs.push(
|
|
9555
|
-
dir =
|
|
9946
|
+
candidateDirs.push(path34.join(dir, ".agentv", "providers"));
|
|
9947
|
+
dir = path34.dirname(dir);
|
|
9556
9948
|
}
|
|
9557
9949
|
let files = [];
|
|
9558
9950
|
for (const providersDir of candidateDirs) {
|
|
@@ -9568,7 +9960,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9568
9960
|
}
|
|
9569
9961
|
const discoveredKinds = [];
|
|
9570
9962
|
for (const filePath of files) {
|
|
9571
|
-
const basename =
|
|
9963
|
+
const basename = path34.basename(filePath);
|
|
9572
9964
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
9573
9965
|
if (registry.has(kindName)) {
|
|
9574
9966
|
continue;
|
|
@@ -9586,7 +9978,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9586
9978
|
// src/evaluation/providers/index.ts
|
|
9587
9979
|
function createBuiltinProviderRegistry() {
|
|
9588
9980
|
const registry = new ProviderRegistry();
|
|
9589
|
-
registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
|
|
9981
|
+
registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("copilot-log", (t) => new CopilotLogProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
|
|
9590
9982
|
"vscode-insiders",
|
|
9591
9983
|
(t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
|
|
9592
9984
|
);
|
|
@@ -9793,15 +10185,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
9793
10185
|
});
|
|
9794
10186
|
}
|
|
9795
10187
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
9796
|
-
const { mkdir: mkdir16, readFile:
|
|
10188
|
+
const { mkdir: mkdir16, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
9797
10189
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
9798
|
-
const
|
|
10190
|
+
const path47 = await import("node:path");
|
|
9799
10191
|
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
9800
|
-
const dir =
|
|
10192
|
+
const dir = path47.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
9801
10193
|
await mkdir16(dir, { recursive: true });
|
|
9802
|
-
const stdinPath =
|
|
9803
|
-
const stdoutPath =
|
|
9804
|
-
const stderrPath =
|
|
10194
|
+
const stdinPath = path47.join(dir, "stdin.txt");
|
|
10195
|
+
const stdoutPath = path47.join(dir, "stdout.txt");
|
|
10196
|
+
const stderrPath = path47.join(dir, "stderr.txt");
|
|
9805
10197
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
9806
10198
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
9807
10199
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -9831,8 +10223,8 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
9831
10223
|
resolve(code ?? 0);
|
|
9832
10224
|
});
|
|
9833
10225
|
});
|
|
9834
|
-
const stdout = (await
|
|
9835
|
-
const stderr = (await
|
|
10226
|
+
const stdout = (await readFile14(stdoutPath, "utf8")).replace(/\r\n/g, "\n");
|
|
10227
|
+
const stderr = (await readFile14(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
9836
10228
|
return { stdout, stderr, exitCode };
|
|
9837
10229
|
} finally {
|
|
9838
10230
|
await rm6(dir, { recursive: true, force: true });
|
|
@@ -10280,7 +10672,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
10280
10672
|
|
|
10281
10673
|
// src/evaluation/evaluators/llm-grader.ts
|
|
10282
10674
|
import fs2 from "node:fs/promises";
|
|
10283
|
-
import
|
|
10675
|
+
import path35 from "node:path";
|
|
10284
10676
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
10285
10677
|
import { z as z3 } from "zod";
|
|
10286
10678
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -11135,8 +11527,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
11135
11527
|
};
|
|
11136
11528
|
}
|
|
11137
11529
|
function resolveSandboxed(basePath, relativePath) {
|
|
11138
|
-
const resolved =
|
|
11139
|
-
if (!resolved.startsWith(basePath +
|
|
11530
|
+
const resolved = path35.resolve(basePath, relativePath);
|
|
11531
|
+
if (!resolved.startsWith(basePath + path35.sep) && resolved !== basePath) {
|
|
11140
11532
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
11141
11533
|
}
|
|
11142
11534
|
return resolved;
|
|
@@ -11169,11 +11561,11 @@ function createFilesystemTools(workspacePath) {
|
|
|
11169
11561
|
execute: async (input) => {
|
|
11170
11562
|
try {
|
|
11171
11563
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
11172
|
-
const
|
|
11173
|
-
if (
|
|
11564
|
+
const stat9 = await fs2.stat(resolved);
|
|
11565
|
+
if (stat9.isDirectory()) {
|
|
11174
11566
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
11175
11567
|
}
|
|
11176
|
-
const buffer = Buffer.alloc(Math.min(
|
|
11568
|
+
const buffer = Buffer.alloc(Math.min(stat9.size, MAX_FILE_SIZE));
|
|
11177
11569
|
const fd = await fs2.open(resolved, "r");
|
|
11178
11570
|
try {
|
|
11179
11571
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
@@ -11181,8 +11573,8 @@ function createFilesystemTools(workspacePath) {
|
|
|
11181
11573
|
await fd.close();
|
|
11182
11574
|
}
|
|
11183
11575
|
const content = buffer.toString("utf-8");
|
|
11184
|
-
const truncated =
|
|
11185
|
-
return { content, truncated, size:
|
|
11576
|
+
const truncated = stat9.size > MAX_FILE_SIZE;
|
|
11577
|
+
return { content, truncated, size: stat9.size };
|
|
11186
11578
|
} catch (error) {
|
|
11187
11579
|
return { error: error instanceof Error ? error.message : String(error) };
|
|
11188
11580
|
}
|
|
@@ -11226,15 +11618,15 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11226
11618
|
for (const entry of entries) {
|
|
11227
11619
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
11228
11620
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
11229
|
-
const fullPath =
|
|
11621
|
+
const fullPath = path35.join(dirPath, entry.name);
|
|
11230
11622
|
if (entry.isDirectory()) {
|
|
11231
11623
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
11232
11624
|
} else if (entry.isFile()) {
|
|
11233
|
-
const ext =
|
|
11625
|
+
const ext = path35.extname(entry.name).toLowerCase();
|
|
11234
11626
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
11235
11627
|
try {
|
|
11236
|
-
const
|
|
11237
|
-
if (
|
|
11628
|
+
const stat9 = await fs2.stat(fullPath);
|
|
11629
|
+
if (stat9.size > MAX_FILE_SIZE) continue;
|
|
11238
11630
|
const content = await fs2.readFile(fullPath, "utf-8");
|
|
11239
11631
|
const lines = content.split("\n");
|
|
11240
11632
|
for (let i = 0; i < lines.length; i++) {
|
|
@@ -11242,7 +11634,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11242
11634
|
regex.lastIndex = 0;
|
|
11243
11635
|
if (regex.test(lines[i])) {
|
|
11244
11636
|
matches.push({
|
|
11245
|
-
file:
|
|
11637
|
+
file: path35.relative(workspacePath, fullPath),
|
|
11246
11638
|
line: i + 1,
|
|
11247
11639
|
text: lines[i].substring(0, 200)
|
|
11248
11640
|
});
|
|
@@ -11875,115 +12267,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
11875
12267
|
* Evaluate a single field against the expected value.
|
|
11876
12268
|
*/
|
|
11877
12269
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
11878
|
-
const { path:
|
|
11879
|
-
const candidateValue = resolvePath(candidateData,
|
|
11880
|
-
const expectedValue = resolvePath(expectedData,
|
|
12270
|
+
const { path: path47, match, required = true, weight = 1 } = fieldConfig;
|
|
12271
|
+
const candidateValue = resolvePath(candidateData, path47);
|
|
12272
|
+
const expectedValue = resolvePath(expectedData, path47);
|
|
11881
12273
|
if (expectedValue === void 0) {
|
|
11882
12274
|
return {
|
|
11883
|
-
path:
|
|
12275
|
+
path: path47,
|
|
11884
12276
|
score: 1,
|
|
11885
12277
|
// No expected value means no comparison needed
|
|
11886
12278
|
weight,
|
|
11887
12279
|
hit: true,
|
|
11888
|
-
message: `${
|
|
12280
|
+
message: `${path47}: no expected value`
|
|
11889
12281
|
};
|
|
11890
12282
|
}
|
|
11891
12283
|
if (candidateValue === void 0) {
|
|
11892
12284
|
if (required) {
|
|
11893
12285
|
return {
|
|
11894
|
-
path:
|
|
12286
|
+
path: path47,
|
|
11895
12287
|
score: 0,
|
|
11896
12288
|
weight,
|
|
11897
12289
|
hit: false,
|
|
11898
|
-
message: `${
|
|
12290
|
+
message: `${path47} (required, missing)`
|
|
11899
12291
|
};
|
|
11900
12292
|
}
|
|
11901
12293
|
return {
|
|
11902
|
-
path:
|
|
12294
|
+
path: path47,
|
|
11903
12295
|
score: 1,
|
|
11904
12296
|
// Don't penalize missing optional fields
|
|
11905
12297
|
weight: 0,
|
|
11906
12298
|
// Zero weight means it won't affect the score
|
|
11907
12299
|
hit: true,
|
|
11908
|
-
message: `${
|
|
12300
|
+
message: `${path47}: optional field missing`
|
|
11909
12301
|
};
|
|
11910
12302
|
}
|
|
11911
12303
|
switch (match) {
|
|
11912
12304
|
case "exact":
|
|
11913
|
-
return this.compareExact(
|
|
12305
|
+
return this.compareExact(path47, candidateValue, expectedValue, weight);
|
|
11914
12306
|
case "numeric_tolerance":
|
|
11915
12307
|
return this.compareNumericTolerance(
|
|
11916
|
-
|
|
12308
|
+
path47,
|
|
11917
12309
|
candidateValue,
|
|
11918
12310
|
expectedValue,
|
|
11919
12311
|
fieldConfig,
|
|
11920
12312
|
weight
|
|
11921
12313
|
);
|
|
11922
12314
|
case "date":
|
|
11923
|
-
return this.compareDate(
|
|
12315
|
+
return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
|
|
11924
12316
|
default:
|
|
11925
12317
|
return {
|
|
11926
|
-
path:
|
|
12318
|
+
path: path47,
|
|
11927
12319
|
score: 0,
|
|
11928
12320
|
weight,
|
|
11929
12321
|
hit: false,
|
|
11930
|
-
message: `${
|
|
12322
|
+
message: `${path47}: unknown match type "${match}"`
|
|
11931
12323
|
};
|
|
11932
12324
|
}
|
|
11933
12325
|
}
|
|
11934
12326
|
/**
|
|
11935
12327
|
* Exact equality comparison.
|
|
11936
12328
|
*/
|
|
11937
|
-
compareExact(
|
|
12329
|
+
compareExact(path47, candidateValue, expectedValue, weight) {
|
|
11938
12330
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
11939
12331
|
return {
|
|
11940
|
-
path:
|
|
12332
|
+
path: path47,
|
|
11941
12333
|
score: 1,
|
|
11942
12334
|
weight,
|
|
11943
12335
|
hit: true,
|
|
11944
|
-
message:
|
|
12336
|
+
message: path47
|
|
11945
12337
|
};
|
|
11946
12338
|
}
|
|
11947
12339
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
11948
12340
|
return {
|
|
11949
|
-
path:
|
|
12341
|
+
path: path47,
|
|
11950
12342
|
score: 0,
|
|
11951
12343
|
weight,
|
|
11952
12344
|
hit: false,
|
|
11953
|
-
message: `${
|
|
12345
|
+
message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
11954
12346
|
};
|
|
11955
12347
|
}
|
|
11956
12348
|
return {
|
|
11957
|
-
path:
|
|
12349
|
+
path: path47,
|
|
11958
12350
|
score: 0,
|
|
11959
12351
|
weight,
|
|
11960
12352
|
hit: false,
|
|
11961
|
-
message: `${
|
|
12353
|
+
message: `${path47} (value mismatch)`
|
|
11962
12354
|
};
|
|
11963
12355
|
}
|
|
11964
12356
|
/**
|
|
11965
12357
|
* Numeric comparison with absolute or relative tolerance.
|
|
11966
12358
|
*/
|
|
11967
|
-
compareNumericTolerance(
|
|
12359
|
+
compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
11968
12360
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
11969
12361
|
const candidateNum = toNumber(candidateValue);
|
|
11970
12362
|
const expectedNum = toNumber(expectedValue);
|
|
11971
12363
|
if (candidateNum === null || expectedNum === null) {
|
|
11972
12364
|
return {
|
|
11973
|
-
path:
|
|
12365
|
+
path: path47,
|
|
11974
12366
|
score: 0,
|
|
11975
12367
|
weight,
|
|
11976
12368
|
hit: false,
|
|
11977
|
-
message: `${
|
|
12369
|
+
message: `${path47} (non-numeric value)`
|
|
11978
12370
|
};
|
|
11979
12371
|
}
|
|
11980
12372
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
11981
12373
|
return {
|
|
11982
|
-
path:
|
|
12374
|
+
path: path47,
|
|
11983
12375
|
score: 0,
|
|
11984
12376
|
weight,
|
|
11985
12377
|
hit: false,
|
|
11986
|
-
message: `${
|
|
12378
|
+
message: `${path47} (invalid numeric value)`
|
|
11987
12379
|
};
|
|
11988
12380
|
}
|
|
11989
12381
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -11996,61 +12388,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
11996
12388
|
}
|
|
11997
12389
|
if (withinTolerance) {
|
|
11998
12390
|
return {
|
|
11999
|
-
path:
|
|
12391
|
+
path: path47,
|
|
12000
12392
|
score: 1,
|
|
12001
12393
|
weight,
|
|
12002
12394
|
hit: true,
|
|
12003
|
-
message: `${
|
|
12395
|
+
message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
|
|
12004
12396
|
};
|
|
12005
12397
|
}
|
|
12006
12398
|
return {
|
|
12007
|
-
path:
|
|
12399
|
+
path: path47,
|
|
12008
12400
|
score: 0,
|
|
12009
12401
|
weight,
|
|
12010
12402
|
hit: false,
|
|
12011
|
-
message: `${
|
|
12403
|
+
message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
12012
12404
|
};
|
|
12013
12405
|
}
|
|
12014
12406
|
/**
|
|
12015
12407
|
* Date comparison with format normalization.
|
|
12016
12408
|
*/
|
|
12017
|
-
compareDate(
|
|
12409
|
+
compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
|
|
12018
12410
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
12019
12411
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
12020
12412
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
12021
12413
|
if (candidateDate === null) {
|
|
12022
12414
|
return {
|
|
12023
|
-
path:
|
|
12415
|
+
path: path47,
|
|
12024
12416
|
score: 0,
|
|
12025
12417
|
weight,
|
|
12026
12418
|
hit: false,
|
|
12027
|
-
message: `${
|
|
12419
|
+
message: `${path47} (unparseable candidate date)`
|
|
12028
12420
|
};
|
|
12029
12421
|
}
|
|
12030
12422
|
if (expectedDate === null) {
|
|
12031
12423
|
return {
|
|
12032
|
-
path:
|
|
12424
|
+
path: path47,
|
|
12033
12425
|
score: 0,
|
|
12034
12426
|
weight,
|
|
12035
12427
|
hit: false,
|
|
12036
|
-
message: `${
|
|
12428
|
+
message: `${path47} (unparseable expected date)`
|
|
12037
12429
|
};
|
|
12038
12430
|
}
|
|
12039
12431
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
12040
12432
|
return {
|
|
12041
|
-
path:
|
|
12433
|
+
path: path47,
|
|
12042
12434
|
score: 1,
|
|
12043
12435
|
weight,
|
|
12044
12436
|
hit: true,
|
|
12045
|
-
message:
|
|
12437
|
+
message: path47
|
|
12046
12438
|
};
|
|
12047
12439
|
}
|
|
12048
12440
|
return {
|
|
12049
|
-
path:
|
|
12441
|
+
path: path47,
|
|
12050
12442
|
score: 0,
|
|
12051
12443
|
weight,
|
|
12052
12444
|
hit: false,
|
|
12053
|
-
message: `${
|
|
12445
|
+
message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
12054
12446
|
};
|
|
12055
12447
|
}
|
|
12056
12448
|
/**
|
|
@@ -12083,11 +12475,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
12083
12475
|
};
|
|
12084
12476
|
}
|
|
12085
12477
|
};
|
|
12086
|
-
function resolvePath(obj,
|
|
12087
|
-
if (!
|
|
12478
|
+
function resolvePath(obj, path47) {
|
|
12479
|
+
if (!path47 || !obj) {
|
|
12088
12480
|
return void 0;
|
|
12089
12481
|
}
|
|
12090
|
-
const parts =
|
|
12482
|
+
const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
12091
12483
|
let current = obj;
|
|
12092
12484
|
for (const part of parts) {
|
|
12093
12485
|
if (current === null || current === void 0) {
|
|
@@ -12255,6 +12647,7 @@ var PROVIDER_TOOL_SEMANTICS = {
|
|
|
12255
12647
|
"pi-coding-agent": PI_CODING_AGENT_MATCHER,
|
|
12256
12648
|
"pi-cli": PI_CODING_AGENT_MATCHER,
|
|
12257
12649
|
"copilot-cli": COPILOT_MATCHER,
|
|
12650
|
+
"copilot-log": COPILOT_MATCHER,
|
|
12258
12651
|
"copilot-sdk": COPILOT_MATCHER,
|
|
12259
12652
|
vscode: COPILOT_MATCHER,
|
|
12260
12653
|
"vscode-insiders": COPILOT_MATCHER
|
|
@@ -12281,8 +12674,9 @@ var SkillTriggerEvaluator = class {
|
|
|
12281
12674
|
let triggered = false;
|
|
12282
12675
|
let evidence = "";
|
|
12283
12676
|
for (const toolCall of allToolCalls) {
|
|
12677
|
+
const toolName = toolCall.tool ?? "";
|
|
12284
12678
|
const input = toolCall.input ?? {};
|
|
12285
|
-
if (matcher.skillTools.includes(
|
|
12679
|
+
if (matcher.skillTools.includes(toolName)) {
|
|
12286
12680
|
const skillArg = String(input[matcher.skillInputField] ?? "");
|
|
12287
12681
|
if (skillArg.includes(skillName)) {
|
|
12288
12682
|
triggered = true;
|
|
@@ -12290,12 +12684,12 @@ var SkillTriggerEvaluator = class {
|
|
|
12290
12684
|
break;
|
|
12291
12685
|
}
|
|
12292
12686
|
} else if (matcher.skillToolPrefixes?.some(
|
|
12293
|
-
(prefix) =>
|
|
12687
|
+
(prefix) => toolName.startsWith(prefix) && toolName.includes(skillName)
|
|
12294
12688
|
)) {
|
|
12295
12689
|
triggered = true;
|
|
12296
|
-
evidence = `Skill tool invoked via tool name "${
|
|
12690
|
+
evidence = `Skill tool invoked via tool name "${toolName}"`;
|
|
12297
12691
|
break;
|
|
12298
|
-
} else if (matcher.readTools.includes(
|
|
12692
|
+
} else if (matcher.readTools.includes(toolName)) {
|
|
12299
12693
|
const filePath = this.readPathFromInput(input, matcher);
|
|
12300
12694
|
if (filePath.includes(skillName)) {
|
|
12301
12695
|
triggered = true;
|
|
@@ -12303,10 +12697,10 @@ var SkillTriggerEvaluator = class {
|
|
|
12303
12697
|
break;
|
|
12304
12698
|
}
|
|
12305
12699
|
} else if (matcher.readToolPrefixes?.some(
|
|
12306
|
-
(prefix) =>
|
|
12700
|
+
(prefix) => toolName.startsWith(prefix) && toolName.includes(skillName)
|
|
12307
12701
|
)) {
|
|
12308
12702
|
triggered = true;
|
|
12309
|
-
evidence = `Read tool loaded skill file via tool name "${
|
|
12703
|
+
evidence = `Read tool loaded skill file via tool name "${toolName}"`;
|
|
12310
12704
|
break;
|
|
12311
12705
|
}
|
|
12312
12706
|
}
|
|
@@ -12568,8 +12962,8 @@ var TokenUsageEvaluator = class {
|
|
|
12568
12962
|
};
|
|
12569
12963
|
|
|
12570
12964
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12571
|
-
function getNestedValue(obj,
|
|
12572
|
-
const parts =
|
|
12965
|
+
function getNestedValue(obj, path47) {
|
|
12966
|
+
const parts = path47.split(".");
|
|
12573
12967
|
let current = obj;
|
|
12574
12968
|
for (const part of parts) {
|
|
12575
12969
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -13190,8 +13584,8 @@ function runEqualsAssertion(output, value) {
|
|
|
13190
13584
|
|
|
13191
13585
|
// src/evaluation/orchestrator.ts
|
|
13192
13586
|
import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
|
|
13193
|
-
import { copyFile as copyFile2, mkdir as mkdir14, readdir as
|
|
13194
|
-
import
|
|
13587
|
+
import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir7, stat as stat8 } from "node:fs/promises";
|
|
13588
|
+
import path44 from "node:path";
|
|
13195
13589
|
import micromatch3 from "micromatch";
|
|
13196
13590
|
|
|
13197
13591
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -13405,7 +13799,7 @@ var InlineAssertEvaluator = class {
|
|
|
13405
13799
|
};
|
|
13406
13800
|
|
|
13407
13801
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
13408
|
-
import
|
|
13802
|
+
import path36 from "node:path";
|
|
13409
13803
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
13410
13804
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
13411
13805
|
if (!context) {
|
|
@@ -13451,7 +13845,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13451
13845
|
};
|
|
13452
13846
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13453
13847
|
const scriptPath = script[script.length - 1];
|
|
13454
|
-
const cwd =
|
|
13848
|
+
const cwd = path36.dirname(scriptPath);
|
|
13455
13849
|
try {
|
|
13456
13850
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
13457
13851
|
const prompt = stdout.trim();
|
|
@@ -13723,16 +14117,16 @@ function createBuiltinRegistry() {
|
|
|
13723
14117
|
}
|
|
13724
14118
|
|
|
13725
14119
|
// src/evaluation/registry/assertion-discovery.ts
|
|
13726
|
-
import
|
|
14120
|
+
import path37 from "node:path";
|
|
13727
14121
|
import fg2 from "fast-glob";
|
|
13728
14122
|
async function discoverAssertions(registry, baseDir) {
|
|
13729
14123
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13730
14124
|
const candidateDirs = [];
|
|
13731
|
-
let dir =
|
|
13732
|
-
const root =
|
|
14125
|
+
let dir = path37.resolve(baseDir);
|
|
14126
|
+
const root = path37.parse(dir).root;
|
|
13733
14127
|
while (dir !== root) {
|
|
13734
|
-
candidateDirs.push(
|
|
13735
|
-
dir =
|
|
14128
|
+
candidateDirs.push(path37.join(dir, ".agentv", "assertions"));
|
|
14129
|
+
dir = path37.dirname(dir);
|
|
13736
14130
|
}
|
|
13737
14131
|
let files = [];
|
|
13738
14132
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -13748,7 +14142,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13748
14142
|
}
|
|
13749
14143
|
const discoveredTypes = [];
|
|
13750
14144
|
for (const filePath of files) {
|
|
13751
|
-
const basename =
|
|
14145
|
+
const basename = path37.basename(filePath);
|
|
13752
14146
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13753
14147
|
if (registry.has(typeName)) {
|
|
13754
14148
|
continue;
|
|
@@ -13766,17 +14160,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13766
14160
|
}
|
|
13767
14161
|
|
|
13768
14162
|
// src/evaluation/registry/grader-discovery.ts
|
|
13769
|
-
import
|
|
14163
|
+
import path38 from "node:path";
|
|
13770
14164
|
import fg3 from "fast-glob";
|
|
13771
14165
|
async function discoverGraders(registry, baseDir) {
|
|
13772
14166
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13773
14167
|
const candidateDirs = [];
|
|
13774
|
-
let dir =
|
|
13775
|
-
const root =
|
|
14168
|
+
let dir = path38.resolve(baseDir);
|
|
14169
|
+
const root = path38.parse(dir).root;
|
|
13776
14170
|
while (dir !== root) {
|
|
13777
|
-
candidateDirs.push(
|
|
13778
|
-
candidateDirs.push(
|
|
13779
|
-
dir =
|
|
14171
|
+
candidateDirs.push(path38.join(dir, ".agentv", "graders"));
|
|
14172
|
+
candidateDirs.push(path38.join(dir, ".agentv", "judges"));
|
|
14173
|
+
dir = path38.dirname(dir);
|
|
13780
14174
|
}
|
|
13781
14175
|
let files = [];
|
|
13782
14176
|
for (const gradersDir of candidateDirs) {
|
|
@@ -13792,7 +14186,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
13792
14186
|
}
|
|
13793
14187
|
const discoveredTypes = [];
|
|
13794
14188
|
for (const filePath of files) {
|
|
13795
|
-
const basename =
|
|
14189
|
+
const basename = path38.basename(filePath);
|
|
13796
14190
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13797
14191
|
if (registry.has(typeName)) {
|
|
13798
14192
|
continue;
|
|
@@ -13952,7 +14346,7 @@ function getTCritical(df) {
|
|
|
13952
14346
|
// src/evaluation/workspace/file-changes.ts
|
|
13953
14347
|
import { exec as execCallback } from "node:child_process";
|
|
13954
14348
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
13955
|
-
import
|
|
14349
|
+
import path39 from "node:path";
|
|
13956
14350
|
import { promisify as promisify4 } from "node:util";
|
|
13957
14351
|
var execAsync4 = promisify4(execCallback);
|
|
13958
14352
|
function gitExecOpts(workspacePath) {
|
|
@@ -13986,10 +14380,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13986
14380
|
}
|
|
13987
14381
|
for (const entry of entries) {
|
|
13988
14382
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
13989
|
-
const childPath =
|
|
14383
|
+
const childPath = path39.join(workspacePath, entry);
|
|
13990
14384
|
try {
|
|
13991
14385
|
if (!statSync(childPath).isDirectory()) continue;
|
|
13992
|
-
if (!statSync(
|
|
14386
|
+
if (!statSync(path39.join(childPath, ".git")).isDirectory()) continue;
|
|
13993
14387
|
} catch {
|
|
13994
14388
|
continue;
|
|
13995
14389
|
}
|
|
@@ -13999,8 +14393,8 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13999
14393
|
}
|
|
14000
14394
|
|
|
14001
14395
|
// src/evaluation/workspace/manager.ts
|
|
14002
|
-
import { cp, mkdir as mkdir12, readdir as
|
|
14003
|
-
import
|
|
14396
|
+
import { cp, mkdir as mkdir12, readdir as readdir4, rm as rm4, stat as stat6 } from "node:fs/promises";
|
|
14397
|
+
import path40 from "node:path";
|
|
14004
14398
|
var TemplateNotFoundError = class extends Error {
|
|
14005
14399
|
constructor(templatePath) {
|
|
14006
14400
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -14022,7 +14416,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
14022
14416
|
};
|
|
14023
14417
|
async function isDirectory(filePath) {
|
|
14024
14418
|
try {
|
|
14025
|
-
const stats = await
|
|
14419
|
+
const stats = await stat6(filePath);
|
|
14026
14420
|
return stats.isDirectory();
|
|
14027
14421
|
} catch {
|
|
14028
14422
|
return false;
|
|
@@ -14030,14 +14424,14 @@ async function isDirectory(filePath) {
|
|
|
14030
14424
|
}
|
|
14031
14425
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
14032
14426
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14033
|
-
return
|
|
14427
|
+
return path40.join(root, evalRunId, caseId);
|
|
14034
14428
|
}
|
|
14035
14429
|
async function copyDirectoryRecursive(src, dest) {
|
|
14036
14430
|
await mkdir12(dest, { recursive: true });
|
|
14037
|
-
const entries = await
|
|
14431
|
+
const entries = await readdir4(src, { withFileTypes: true });
|
|
14038
14432
|
for (const entry of entries) {
|
|
14039
|
-
const srcPath =
|
|
14040
|
-
const destPath =
|
|
14433
|
+
const srcPath = path40.join(src, entry.name);
|
|
14434
|
+
const destPath = path40.join(dest, entry.name);
|
|
14041
14435
|
if (entry.name === ".git") {
|
|
14042
14436
|
continue;
|
|
14043
14437
|
}
|
|
@@ -14049,7 +14443,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
14049
14443
|
}
|
|
14050
14444
|
}
|
|
14051
14445
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
14052
|
-
const resolvedTemplatePath =
|
|
14446
|
+
const resolvedTemplatePath = path40.resolve(templatePath);
|
|
14053
14447
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
14054
14448
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
14055
14449
|
}
|
|
@@ -14098,7 +14492,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
14098
14492
|
}
|
|
14099
14493
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
14100
14494
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
14101
|
-
const evalDir =
|
|
14495
|
+
const evalDir = path40.join(root, evalRunId);
|
|
14102
14496
|
if (await fileExists(evalDir)) {
|
|
14103
14497
|
await rm4(evalDir, { recursive: true, force: true });
|
|
14104
14498
|
}
|
|
@@ -14108,8 +14502,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
14108
14502
|
import { execFile } from "node:child_process";
|
|
14109
14503
|
import { createHash } from "node:crypto";
|
|
14110
14504
|
import { existsSync as existsSync2 } from "node:fs";
|
|
14111
|
-
import { cp as cp2, mkdir as mkdir13, readFile as
|
|
14112
|
-
import
|
|
14505
|
+
import { cp as cp2, mkdir as mkdir13, readFile as readFile12, readdir as readdir5, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
14506
|
+
import path41 from "node:path";
|
|
14113
14507
|
import { promisify as promisify5 } from "node:util";
|
|
14114
14508
|
var execFileAsync = promisify5(execFile);
|
|
14115
14509
|
function gitEnv() {
|
|
@@ -14161,10 +14555,10 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
14161
14555
|
}
|
|
14162
14556
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
14163
14557
|
await mkdir13(dest, { recursive: true });
|
|
14164
|
-
const entries = await
|
|
14558
|
+
const entries = await readdir5(src, { withFileTypes: true });
|
|
14165
14559
|
for (const entry of entries) {
|
|
14166
|
-
const srcPath =
|
|
14167
|
-
const destPath =
|
|
14560
|
+
const srcPath = path41.join(src, entry.name);
|
|
14561
|
+
const destPath = path41.join(dest, entry.name);
|
|
14168
14562
|
if (entry.name === ".git") {
|
|
14169
14563
|
continue;
|
|
14170
14564
|
}
|
|
@@ -14197,7 +14591,7 @@ var WorkspacePoolManager = class {
|
|
|
14197
14591
|
async acquireWorkspace(options) {
|
|
14198
14592
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
14199
14593
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
14200
|
-
const poolDir =
|
|
14594
|
+
const poolDir = path41.join(this.poolRoot, fingerprint);
|
|
14201
14595
|
await mkdir13(poolDir, { recursive: true });
|
|
14202
14596
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
14203
14597
|
if (drifted) {
|
|
@@ -14207,7 +14601,7 @@ var WorkspacePoolManager = class {
|
|
|
14207
14601
|
await this.removeAllSlots(poolDir);
|
|
14208
14602
|
}
|
|
14209
14603
|
for (let i = 0; i < maxSlots; i++) {
|
|
14210
|
-
const slotPath =
|
|
14604
|
+
const slotPath = path41.join(poolDir, `slot-${i}`);
|
|
14211
14605
|
const lockPath = `${slotPath}.lock`;
|
|
14212
14606
|
const locked = await this.tryLock(lockPath);
|
|
14213
14607
|
if (!locked) {
|
|
@@ -14269,7 +14663,7 @@ var WorkspacePoolManager = class {
|
|
|
14269
14663
|
throw err;
|
|
14270
14664
|
}
|
|
14271
14665
|
try {
|
|
14272
|
-
const pidStr = await
|
|
14666
|
+
const pidStr = await readFile12(lockPath, "utf-8");
|
|
14273
14667
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14274
14668
|
if (!Number.isNaN(pid)) {
|
|
14275
14669
|
try {
|
|
@@ -14294,9 +14688,9 @@ var WorkspacePoolManager = class {
|
|
|
14294
14688
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
14295
14689
|
*/
|
|
14296
14690
|
async checkDrift(poolDir, fingerprint) {
|
|
14297
|
-
const metadataPath =
|
|
14691
|
+
const metadataPath = path41.join(poolDir, "metadata.json");
|
|
14298
14692
|
try {
|
|
14299
|
-
const raw = await
|
|
14693
|
+
const raw = await readFile12(metadataPath, "utf-8");
|
|
14300
14694
|
const metadata = JSON.parse(raw);
|
|
14301
14695
|
return metadata.fingerprint !== fingerprint;
|
|
14302
14696
|
} catch {
|
|
@@ -14311,17 +14705,17 @@ var WorkspacePoolManager = class {
|
|
|
14311
14705
|
repos,
|
|
14312
14706
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14313
14707
|
};
|
|
14314
|
-
await writeFile7(
|
|
14708
|
+
await writeFile7(path41.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
14315
14709
|
}
|
|
14316
14710
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
14317
14711
|
async removeAllSlots(poolDir) {
|
|
14318
|
-
const entries = await
|
|
14712
|
+
const entries = await readdir5(poolDir);
|
|
14319
14713
|
for (const entry of entries) {
|
|
14320
14714
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
14321
|
-
const lockPath =
|
|
14715
|
+
const lockPath = path41.join(poolDir, `${entry}.lock`);
|
|
14322
14716
|
if (existsSync2(lockPath)) {
|
|
14323
14717
|
try {
|
|
14324
|
-
const pidStr = await
|
|
14718
|
+
const pidStr = await readFile12(lockPath, "utf-8");
|
|
14325
14719
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
14326
14720
|
if (!Number.isNaN(pid)) {
|
|
14327
14721
|
try {
|
|
@@ -14334,12 +14728,12 @@ var WorkspacePoolManager = class {
|
|
|
14334
14728
|
} catch {
|
|
14335
14729
|
}
|
|
14336
14730
|
}
|
|
14337
|
-
await rm5(
|
|
14731
|
+
await rm5(path41.join(poolDir, entry), { recursive: true, force: true });
|
|
14338
14732
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
14339
14733
|
});
|
|
14340
14734
|
}
|
|
14341
14735
|
}
|
|
14342
|
-
await rm5(
|
|
14736
|
+
await rm5(path41.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
14343
14737
|
});
|
|
14344
14738
|
}
|
|
14345
14739
|
/**
|
|
@@ -14349,7 +14743,7 @@ var WorkspacePoolManager = class {
|
|
|
14349
14743
|
*/
|
|
14350
14744
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
14351
14745
|
for (const repo of repos) {
|
|
14352
|
-
const repoDir =
|
|
14746
|
+
const repoDir = path41.join(slotPath, repo.path);
|
|
14353
14747
|
if (!existsSync2(repoDir)) {
|
|
14354
14748
|
continue;
|
|
14355
14749
|
}
|
|
@@ -14376,7 +14770,7 @@ var WorkspacePoolManager = class {
|
|
|
14376
14770
|
// src/evaluation/workspace/repo-manager.ts
|
|
14377
14771
|
import { execFile as execFile2 } from "node:child_process";
|
|
14378
14772
|
import { existsSync as existsSync3 } from "node:fs";
|
|
14379
|
-
import
|
|
14773
|
+
import path42 from "node:path";
|
|
14380
14774
|
import { promisify as promisify6 } from "node:util";
|
|
14381
14775
|
var execFileAsync2 = promisify6(execFile2);
|
|
14382
14776
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -14476,7 +14870,7 @@ ${lines.join("\n")}`;
|
|
|
14476
14870
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
14477
14871
|
*/
|
|
14478
14872
|
async materialize(repo, workspacePath) {
|
|
14479
|
-
const targetDir =
|
|
14873
|
+
const targetDir = path42.join(workspacePath, repo.path);
|
|
14480
14874
|
const sourceUrl = getSourceUrl(repo.source);
|
|
14481
14875
|
const startedAt = Date.now();
|
|
14482
14876
|
if (this.verbose) {
|
|
@@ -14567,7 +14961,7 @@ ${lines.join("\n")}`;
|
|
|
14567
14961
|
async reset(repos, workspacePath, reset) {
|
|
14568
14962
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
14569
14963
|
for (const repo of repos) {
|
|
14570
|
-
const targetDir =
|
|
14964
|
+
const targetDir = path42.join(workspacePath, repo.path);
|
|
14571
14965
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
14572
14966
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
14573
14967
|
}
|
|
@@ -14575,36 +14969,36 @@ ${lines.join("\n")}`;
|
|
|
14575
14969
|
};
|
|
14576
14970
|
|
|
14577
14971
|
// src/evaluation/workspace/resolve.ts
|
|
14578
|
-
import { readdir as
|
|
14579
|
-
import
|
|
14972
|
+
import { readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
14973
|
+
import path43 from "node:path";
|
|
14580
14974
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
14581
14975
|
if (!templatePath) {
|
|
14582
14976
|
return void 0;
|
|
14583
14977
|
}
|
|
14584
|
-
const resolved =
|
|
14585
|
-
const stats = await
|
|
14978
|
+
const resolved = path43.resolve(templatePath);
|
|
14979
|
+
const stats = await stat7(resolved);
|
|
14586
14980
|
if (stats.isFile()) {
|
|
14587
14981
|
return {
|
|
14588
|
-
dir:
|
|
14982
|
+
dir: path43.dirname(resolved),
|
|
14589
14983
|
workspaceFile: resolved
|
|
14590
14984
|
};
|
|
14591
14985
|
}
|
|
14592
14986
|
if (!stats.isDirectory()) {
|
|
14593
14987
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
14594
14988
|
}
|
|
14595
|
-
const entries = await
|
|
14989
|
+
const entries = await readdir6(resolved);
|
|
14596
14990
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
14597
14991
|
if (workspaceFiles.length === 1) {
|
|
14598
14992
|
return {
|
|
14599
14993
|
dir: resolved,
|
|
14600
|
-
workspaceFile:
|
|
14994
|
+
workspaceFile: path43.join(resolved, workspaceFiles[0])
|
|
14601
14995
|
};
|
|
14602
14996
|
}
|
|
14603
14997
|
if (workspaceFiles.length > 1) {
|
|
14604
14998
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
14605
14999
|
return {
|
|
14606
15000
|
dir: resolved,
|
|
14607
|
-
workspaceFile: conventionFile ?
|
|
15001
|
+
workspaceFile: conventionFile ? path43.join(resolved, conventionFile) : void 0
|
|
14608
15002
|
};
|
|
14609
15003
|
}
|
|
14610
15004
|
return { dir: resolved };
|
|
@@ -14820,7 +15214,7 @@ async function runEvaluation(options) {
|
|
|
14820
15214
|
];
|
|
14821
15215
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
14822
15216
|
const typeRegistry = createBuiltinRegistry();
|
|
14823
|
-
const discoveryBaseDir = evalFilePath ?
|
|
15217
|
+
const discoveryBaseDir = evalFilePath ? path44.dirname(path44.resolve(evalFilePath)) : process.cwd();
|
|
14824
15218
|
const evalDir = discoveryBaseDir;
|
|
14825
15219
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
14826
15220
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -14960,11 +15354,11 @@ async function runEvaluation(options) {
|
|
|
14960
15354
|
let staticMaterialised = false;
|
|
14961
15355
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
14962
15356
|
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
14963
|
-
const dirExists = await
|
|
15357
|
+
const dirExists = await stat8(configuredStaticPath).then(
|
|
14964
15358
|
(s) => s.isDirectory(),
|
|
14965
15359
|
() => false
|
|
14966
15360
|
);
|
|
14967
|
-
const isEmpty = dirExists ? (await
|
|
15361
|
+
const isEmpty = dirExists ? (await readdir7(configuredStaticPath)).length === 0 : false;
|
|
14968
15362
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
14969
15363
|
if (!dirExists) {
|
|
14970
15364
|
await mkdir14(configuredStaticPath, { recursive: true });
|
|
@@ -15017,9 +15411,9 @@ async function runEvaluation(options) {
|
|
|
15017
15411
|
}
|
|
15018
15412
|
try {
|
|
15019
15413
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
15020
|
-
const copiedWorkspaceFile =
|
|
15414
|
+
const copiedWorkspaceFile = path44.join(sharedWorkspacePath, path44.basename(suiteWorkspaceFile));
|
|
15021
15415
|
try {
|
|
15022
|
-
await
|
|
15416
|
+
await stat8(copiedWorkspaceFile);
|
|
15023
15417
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
15024
15418
|
} catch {
|
|
15025
15419
|
}
|
|
@@ -15599,9 +15993,9 @@ async function runEvalCase(options) {
|
|
|
15599
15993
|
);
|
|
15600
15994
|
}
|
|
15601
15995
|
if (caseWorkspaceFile && workspacePath) {
|
|
15602
|
-
const copiedFile =
|
|
15996
|
+
const copiedFile = path44.join(workspacePath, path44.basename(caseWorkspaceFile));
|
|
15603
15997
|
try {
|
|
15604
|
-
await
|
|
15998
|
+
await stat8(copiedFile);
|
|
15605
15999
|
caseWorkspaceFile = copiedFile;
|
|
15606
16000
|
} catch {
|
|
15607
16001
|
}
|
|
@@ -15661,10 +16055,10 @@ async function runEvalCase(options) {
|
|
|
15661
16055
|
const files = evalCase.metadata.agent_skills_files;
|
|
15662
16056
|
if (baseDir && files.length > 0) {
|
|
15663
16057
|
for (const relPath of files) {
|
|
15664
|
-
const srcPath =
|
|
15665
|
-
const destPath =
|
|
16058
|
+
const srcPath = path44.resolve(baseDir, relPath);
|
|
16059
|
+
const destPath = path44.resolve(workspacePath, relPath);
|
|
15666
16060
|
try {
|
|
15667
|
-
await mkdir14(
|
|
16061
|
+
await mkdir14(path44.dirname(destPath), { recursive: true });
|
|
15668
16062
|
await copyFile2(srcPath, destPath);
|
|
15669
16063
|
} catch (error) {
|
|
15670
16064
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -16310,7 +16704,7 @@ async function runEvaluatorList(options) {
|
|
|
16310
16704
|
fileChanges,
|
|
16311
16705
|
workspacePath
|
|
16312
16706
|
};
|
|
16313
|
-
const evalFileDir = evalCase.file_paths[0] ?
|
|
16707
|
+
const evalFileDir = evalCase.file_paths[0] ? path44.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
16314
16708
|
const dispatchContext = {
|
|
16315
16709
|
graderProvider,
|
|
16316
16710
|
targetResolver,
|
|
@@ -16644,7 +17038,7 @@ function computeWeightedMean(entries) {
|
|
|
16644
17038
|
|
|
16645
17039
|
// src/evaluation/evaluate.ts
|
|
16646
17040
|
import { existsSync as existsSync4 } from "node:fs";
|
|
16647
|
-
import
|
|
17041
|
+
import path45 from "node:path";
|
|
16648
17042
|
|
|
16649
17043
|
// src/evaluation/providers/function-provider.ts
|
|
16650
17044
|
function createFunctionProvider(taskFn) {
|
|
@@ -16681,7 +17075,7 @@ async function evaluate(config) {
|
|
|
16681
17075
|
}
|
|
16682
17076
|
const gitRoot = await findGitRoot(process.cwd());
|
|
16683
17077
|
const repoRoot = gitRoot ?? process.cwd();
|
|
16684
|
-
const testFilePath = config.specFile ?
|
|
17078
|
+
const testFilePath = config.specFile ? path45.resolve(config.specFile) : path45.join(process.cwd(), "__programmatic__.yaml");
|
|
16685
17079
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
16686
17080
|
let resolvedTarget;
|
|
16687
17081
|
let taskProvider;
|
|
@@ -16802,10 +17196,10 @@ function computeSummary(results, durationMs) {
|
|
|
16802
17196
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
16803
17197
|
async function discoverDefaultTarget(repoRoot) {
|
|
16804
17198
|
const cwd = process.cwd();
|
|
16805
|
-
const chain = buildDirectoryChain(
|
|
17199
|
+
const chain = buildDirectoryChain(path45.join(cwd, "_placeholder"), repoRoot);
|
|
16806
17200
|
for (const dir of chain) {
|
|
16807
17201
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16808
|
-
const targetsPath =
|
|
17202
|
+
const targetsPath = path45.join(dir, candidate);
|
|
16809
17203
|
if (!existsSync4(targetsPath)) continue;
|
|
16810
17204
|
try {
|
|
16811
17205
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -16822,7 +17216,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
16822
17216
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
16823
17217
|
const envFiles = [];
|
|
16824
17218
|
for (const dir of chain) {
|
|
16825
|
-
const envPath =
|
|
17219
|
+
const envPath = path45.join(dir, ".env");
|
|
16826
17220
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
16827
17221
|
}
|
|
16828
17222
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -17001,8 +17395,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
17001
17395
|
}
|
|
17002
17396
|
|
|
17003
17397
|
// src/evaluation/cache/response-cache.ts
|
|
17004
|
-
import { mkdir as mkdir15, readFile as
|
|
17005
|
-
import
|
|
17398
|
+
import { mkdir as mkdir15, readFile as readFile13, writeFile as writeFile8 } from "node:fs/promises";
|
|
17399
|
+
import path46 from "node:path";
|
|
17006
17400
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
17007
17401
|
var ResponseCache = class {
|
|
17008
17402
|
cachePath;
|
|
@@ -17012,7 +17406,7 @@ var ResponseCache = class {
|
|
|
17012
17406
|
async get(key) {
|
|
17013
17407
|
const filePath = this.keyToPath(key);
|
|
17014
17408
|
try {
|
|
17015
|
-
const data = await
|
|
17409
|
+
const data = await readFile13(filePath, "utf8");
|
|
17016
17410
|
return JSON.parse(data);
|
|
17017
17411
|
} catch {
|
|
17018
17412
|
return void 0;
|
|
@@ -17020,13 +17414,13 @@ var ResponseCache = class {
|
|
|
17020
17414
|
}
|
|
17021
17415
|
async set(key, value) {
|
|
17022
17416
|
const filePath = this.keyToPath(key);
|
|
17023
|
-
const dir =
|
|
17417
|
+
const dir = path46.dirname(filePath);
|
|
17024
17418
|
await mkdir15(dir, { recursive: true });
|
|
17025
17419
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
17026
17420
|
}
|
|
17027
17421
|
keyToPath(key) {
|
|
17028
17422
|
const prefix = key.slice(0, 2);
|
|
17029
|
-
return
|
|
17423
|
+
return path46.join(this.cachePath, prefix, `${key}.json`);
|
|
17030
17424
|
}
|
|
17031
17425
|
};
|
|
17032
17426
|
function shouldEnableCache(params) {
|
|
@@ -17647,6 +18041,7 @@ export {
|
|
|
17647
18041
|
defineConfig,
|
|
17648
18042
|
detectFormat,
|
|
17649
18043
|
discoverAssertions,
|
|
18044
|
+
discoverCopilotSessions,
|
|
17650
18045
|
discoverGraders,
|
|
17651
18046
|
discoverGraders as discoverJudges,
|
|
17652
18047
|
discoverProviders,
|