@agentv/core 2.5.4 → 2.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-RP3M7COZ.js → chunk-LGQ5OPJD.js} +50 -1
- package/dist/chunk-LGQ5OPJD.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +25 -0
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +25 -1
- package/dist/evaluation/validation/index.js.map +1 -1
- package/dist/index.cjs +928 -309
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +31 -2
- package/dist/index.d.ts +31 -2
- package/dist/index.js +800 -231
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
- package/dist/chunk-RP3M7COZ.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -10,7 +10,7 @@ import {
|
|
|
10
10
|
readTextFile,
|
|
11
11
|
resolveFileReference,
|
|
12
12
|
resolveTargetDefinition
|
|
13
|
-
} from "./chunk-
|
|
13
|
+
} from "./chunk-LGQ5OPJD.js";
|
|
14
14
|
|
|
15
15
|
// src/evaluation/types.ts
|
|
16
16
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
@@ -987,12 +987,13 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
987
987
|
let scoreRanges;
|
|
988
988
|
const rawScoreRanges = rawRubric.score_ranges;
|
|
989
989
|
if (rawScoreRanges !== void 0) {
|
|
990
|
-
|
|
990
|
+
const normalized = normalizeScoreRangesShorthand(rawScoreRanges);
|
|
991
|
+
if (!Array.isArray(normalized)) {
|
|
991
992
|
throw new Error(
|
|
992
|
-
`Invalid score_ranges for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an array`
|
|
993
|
+
`Invalid score_ranges for rubric '${id}' in evaluator '${evaluatorName}' in '${evalId}': must be an array or shorthand map`
|
|
993
994
|
);
|
|
994
995
|
}
|
|
995
|
-
scoreRanges = parseScoreRanges(
|
|
996
|
+
scoreRanges = parseScoreRanges(normalized, id, evaluatorName, evalId);
|
|
996
997
|
items.push({
|
|
997
998
|
id,
|
|
998
999
|
weight,
|
|
@@ -1020,6 +1021,37 @@ function parseRubricItems(rawRubrics, evaluatorName, evalId) {
|
|
|
1020
1021
|
}
|
|
1021
1022
|
return items.length > 0 ? items : void 0;
|
|
1022
1023
|
}
|
|
1024
|
+
function normalizeScoreRangesShorthand(raw) {
|
|
1025
|
+
if (Array.isArray(raw)) return raw;
|
|
1026
|
+
if (!isJsonObject2(raw)) return raw;
|
|
1027
|
+
const keys = Object.keys(raw);
|
|
1028
|
+
if (keys.length === 0) return raw;
|
|
1029
|
+
const numericKeys = [];
|
|
1030
|
+
for (const key of keys) {
|
|
1031
|
+
const num = Number(key);
|
|
1032
|
+
if (!Number.isInteger(num) || num < 0 || num > 10) {
|
|
1033
|
+
return raw;
|
|
1034
|
+
}
|
|
1035
|
+
if (typeof raw[key] !== "string" || raw[key].length === 0) {
|
|
1036
|
+
return raw;
|
|
1037
|
+
}
|
|
1038
|
+
numericKeys.push(num);
|
|
1039
|
+
}
|
|
1040
|
+
numericKeys.sort((a, b) => a - b);
|
|
1041
|
+
if (numericKeys[0] !== 0) {
|
|
1042
|
+
throw new Error(`score_ranges shorthand map must start at 0 (got ${numericKeys[0]})`);
|
|
1043
|
+
}
|
|
1044
|
+
const result = [];
|
|
1045
|
+
for (let i = 0; i < numericKeys.length; i++) {
|
|
1046
|
+
const min = numericKeys[i];
|
|
1047
|
+
const max = i < numericKeys.length - 1 ? numericKeys[i + 1] - 1 : 10;
|
|
1048
|
+
result.push({
|
|
1049
|
+
score_range: [min, max],
|
|
1050
|
+
expected_outcome: raw[String(min)]
|
|
1051
|
+
});
|
|
1052
|
+
}
|
|
1053
|
+
return result;
|
|
1054
|
+
}
|
|
1023
1055
|
function parseScoreRanges(rawRanges, rubricId, evaluatorName, evalId) {
|
|
1024
1056
|
const ranges = [];
|
|
1025
1057
|
for (const [index, rawRange] of rawRanges.entries()) {
|
|
@@ -1102,7 +1134,8 @@ function parseInlineRubrics(rawRubrics) {
|
|
|
1102
1134
|
}
|
|
1103
1135
|
const expectedOutcome = asString(rubric.expected_outcome) ?? asString(rubric.description) ?? "";
|
|
1104
1136
|
const rawScoreRanges = rubric.score_ranges;
|
|
1105
|
-
const
|
|
1137
|
+
const normalizedScoreRanges = rawScoreRanges !== void 0 ? normalizeScoreRangesShorthand(rawScoreRanges) : void 0;
|
|
1138
|
+
const scoreRanges = Array.isArray(normalizedScoreRanges) && normalizedScoreRanges.length > 0 ? normalizedScoreRanges.filter((r) => isJsonObject2(r)).map((range) => ({
|
|
1106
1139
|
score_range: Array.isArray(range.score_range) ? range.score_range : [0, 10],
|
|
1107
1140
|
expected_outcome: asString(range.expected_outcome) ?? asString(range.description) ?? ""
|
|
1108
1141
|
})).filter((r) => r.expected_outcome.length > 0) : void 0;
|
|
@@ -3189,8 +3222,8 @@ async function runClaudeCodeWithTempFiles(options, stdoutFile, stderrFile, exitF
|
|
|
3189
3222
|
};
|
|
3190
3223
|
const fileExists4 = async (filePath) => {
|
|
3191
3224
|
try {
|
|
3192
|
-
const { access:
|
|
3193
|
-
await
|
|
3225
|
+
const { access: access6 } = await import("node:fs/promises");
|
|
3226
|
+
await access6(filePath);
|
|
3194
3227
|
return true;
|
|
3195
3228
|
} catch {
|
|
3196
3229
|
return false;
|
|
@@ -4515,6 +4548,538 @@ function shouldShellExecute(executable) {
|
|
|
4515
4548
|
return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
|
|
4516
4549
|
}
|
|
4517
4550
|
|
|
4551
|
+
// src/evaluation/providers/copilot-cli.ts
|
|
4552
|
+
import { exec as execCallback2, spawn as spawn3 } from "node:child_process";
|
|
4553
|
+
import { randomUUID as randomUUID3 } from "node:crypto";
|
|
4554
|
+
import { constants as constants3, createWriteStream as createWriteStream3 } from "node:fs";
|
|
4555
|
+
import { access as access3, copyFile, mkdir as mkdir3, mkdtemp as mkdtemp3, rm as rm3, writeFile as writeFile3 } from "node:fs/promises";
|
|
4556
|
+
import { tmpdir as tmpdir3 } from "node:os";
|
|
4557
|
+
import path12 from "node:path";
|
|
4558
|
+
import { promisify as promisify3 } from "node:util";
|
|
4559
|
+
|
|
4560
|
+
// src/evaluation/providers/copilot-cli-log-tracker.ts
|
|
4561
|
+
var GLOBAL_LOGS_KEY3 = Symbol.for("agentv.copilotCliLogs");
|
|
4562
|
+
var GLOBAL_SUBSCRIBERS_KEY3 = Symbol.for("agentv.copilotCliLogSubscribers");
|
|
4563
|
+
function getCopilotCliLogStore() {
|
|
4564
|
+
const globalObject = globalThis;
|
|
4565
|
+
const existing = globalObject[GLOBAL_LOGS_KEY3];
|
|
4566
|
+
if (existing) {
|
|
4567
|
+
return existing;
|
|
4568
|
+
}
|
|
4569
|
+
const created = [];
|
|
4570
|
+
globalObject[GLOBAL_LOGS_KEY3] = created;
|
|
4571
|
+
return created;
|
|
4572
|
+
}
|
|
4573
|
+
function getSubscriberStore3() {
|
|
4574
|
+
const globalObject = globalThis;
|
|
4575
|
+
const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY3];
|
|
4576
|
+
if (existing) {
|
|
4577
|
+
return existing;
|
|
4578
|
+
}
|
|
4579
|
+
const created = /* @__PURE__ */ new Set();
|
|
4580
|
+
globalObject[GLOBAL_SUBSCRIBERS_KEY3] = created;
|
|
4581
|
+
return created;
|
|
4582
|
+
}
|
|
4583
|
+
function notifySubscribers3(entry) {
|
|
4584
|
+
const subscribers = Array.from(getSubscriberStore3());
|
|
4585
|
+
for (const listener of subscribers) {
|
|
4586
|
+
try {
|
|
4587
|
+
listener(entry);
|
|
4588
|
+
} catch (error) {
|
|
4589
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4590
|
+
console.warn(`Copilot CLI log subscriber failed: ${message}`);
|
|
4591
|
+
}
|
|
4592
|
+
}
|
|
4593
|
+
}
|
|
4594
|
+
function recordCopilotCliLogEntry(entry) {
|
|
4595
|
+
getCopilotCliLogStore().push(entry);
|
|
4596
|
+
notifySubscribers3(entry);
|
|
4597
|
+
}
|
|
4598
|
+
function consumeCopilotCliLogEntries() {
|
|
4599
|
+
const store = getCopilotCliLogStore();
|
|
4600
|
+
if (store.length === 0) {
|
|
4601
|
+
return [];
|
|
4602
|
+
}
|
|
4603
|
+
return store.splice(0, store.length);
|
|
4604
|
+
}
|
|
4605
|
+
function subscribeToCopilotCliLogEntries(listener) {
|
|
4606
|
+
const store = getSubscriberStore3();
|
|
4607
|
+
store.add(listener);
|
|
4608
|
+
return () => {
|
|
4609
|
+
store.delete(listener);
|
|
4610
|
+
};
|
|
4611
|
+
}
|
|
4612
|
+
|
|
4613
|
+
// src/evaluation/providers/copilot-cli.ts
|
|
4614
|
+
var execAsync3 = promisify3(execCallback2);
|
|
4615
|
+
var WORKSPACE_PREFIX3 = "agentv-copilot-";
|
|
4616
|
+
var PROMPT_FILENAME3 = "prompt.md";
|
|
4617
|
+
var DEFAULT_SYSTEM_PROMPT4 = `**IMPORTANT**: Follow these instructions for your response:
|
|
4618
|
+
- Do NOT create any additional output files in the workspace.
|
|
4619
|
+
- All intended file outputs/changes MUST be written in your response.
|
|
4620
|
+
- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
|
|
4621
|
+
This is required for evaluation scoring.`;
|
|
4622
|
+
async function copyInputFilesToWorkspace(workspaceRoot, inputFiles) {
|
|
4623
|
+
const usedNames = /* @__PURE__ */ new Map();
|
|
4624
|
+
const mappings = [];
|
|
4625
|
+
for (const originalPath of inputFiles) {
|
|
4626
|
+
const ext = path12.extname(originalPath);
|
|
4627
|
+
const stem = path12.basename(originalPath, ext);
|
|
4628
|
+
let relativeName;
|
|
4629
|
+
const baseKey = `${stem}${ext}`;
|
|
4630
|
+
const count = usedNames.get(baseKey) ?? 0;
|
|
4631
|
+
if (count === 0) {
|
|
4632
|
+
relativeName = baseKey;
|
|
4633
|
+
} else {
|
|
4634
|
+
relativeName = `${stem}_${count}${ext}`;
|
|
4635
|
+
}
|
|
4636
|
+
usedNames.set(baseKey, count + 1);
|
|
4637
|
+
const dest = path12.join(workspaceRoot, relativeName);
|
|
4638
|
+
await copyFile(originalPath, dest);
|
|
4639
|
+
mappings.push({ originalPath, workspaceRelativePath: relativeName });
|
|
4640
|
+
}
|
|
4641
|
+
return mappings;
|
|
4642
|
+
}
|
|
4643
|
+
function buildCopilotFilePrereadBlock(guidelineMappings, inputMappings) {
|
|
4644
|
+
if (guidelineMappings.length === 0 && inputMappings.length === 0) {
|
|
4645
|
+
return "";
|
|
4646
|
+
}
|
|
4647
|
+
const buildList = (mappings) => mappings.map((m) => `* ${m.workspaceRelativePath}`).join("\n");
|
|
4648
|
+
const sections = [];
|
|
4649
|
+
if (guidelineMappings.length > 0) {
|
|
4650
|
+
sections.push(`Read all guideline files:
|
|
4651
|
+
${buildList(guidelineMappings)}.`);
|
|
4652
|
+
}
|
|
4653
|
+
if (inputMappings.length > 0) {
|
|
4654
|
+
sections.push(`Read all input files:
|
|
4655
|
+
${buildList(inputMappings)}.`);
|
|
4656
|
+
}
|
|
4657
|
+
sections.push(
|
|
4658
|
+
"If any file is missing, fail with ERROR: missing-file <filename> and stop.",
|
|
4659
|
+
"Then apply system_instructions on the user query below."
|
|
4660
|
+
);
|
|
4661
|
+
return sections.join("\n");
|
|
4662
|
+
}
|
|
4663
|
+
var CopilotCliProvider = class {
|
|
4664
|
+
id;
|
|
4665
|
+
kind = "copilot-cli";
|
|
4666
|
+
targetName;
|
|
4667
|
+
supportsBatch = false;
|
|
4668
|
+
config;
|
|
4669
|
+
runCopilot;
|
|
4670
|
+
environmentCheck;
|
|
4671
|
+
resolvedExecutable;
|
|
4672
|
+
constructor(targetName, config, runner = defaultCopilotCliRunner) {
|
|
4673
|
+
this.id = `copilot-cli:${targetName}`;
|
|
4674
|
+
this.targetName = targetName;
|
|
4675
|
+
this.config = config;
|
|
4676
|
+
this.runCopilot = runner;
|
|
4677
|
+
}
|
|
4678
|
+
async invoke(request) {
|
|
4679
|
+
if (request.signal?.aborted) {
|
|
4680
|
+
throw new Error("Copilot CLI request was aborted before execution");
|
|
4681
|
+
}
|
|
4682
|
+
await this.ensureEnvironmentReady();
|
|
4683
|
+
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
4684
|
+
const workspaceRoot = await this.createWorkspace();
|
|
4685
|
+
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
4686
|
+
try {
|
|
4687
|
+
const copiedFiles = inputFiles ? await copyInputFilesToWorkspace(workspaceRoot, inputFiles) : [];
|
|
4688
|
+
const guidelineFileSet = new Set(
|
|
4689
|
+
collectGuidelineFiles(inputFiles, request.guideline_patterns)
|
|
4690
|
+
);
|
|
4691
|
+
const guidelineMappings = copiedFiles.filter((m) => guidelineFileSet.has(m.originalPath));
|
|
4692
|
+
const nonGuidelineMappings = copiedFiles.filter((m) => !guidelineFileSet.has(m.originalPath));
|
|
4693
|
+
const prereadBlock = buildCopilotFilePrereadBlock(guidelineMappings, nonGuidelineMappings);
|
|
4694
|
+
const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT4;
|
|
4695
|
+
const promptParts = [systemPrompt];
|
|
4696
|
+
if (prereadBlock.length > 0) {
|
|
4697
|
+
promptParts.push("", prereadBlock);
|
|
4698
|
+
}
|
|
4699
|
+
promptParts.push("", "[[ ## user_query ## ]]", request.question.trim());
|
|
4700
|
+
const promptContent = promptParts.join("\n");
|
|
4701
|
+
const promptFile = path12.join(workspaceRoot, PROMPT_FILENAME3);
|
|
4702
|
+
await writeFile3(promptFile, promptContent, "utf8");
|
|
4703
|
+
const args = this.buildCopilotArgs(promptContent);
|
|
4704
|
+
const cwd = this.resolveCwd(workspaceRoot);
|
|
4705
|
+
const result = await this.executeCopilot(args, cwd, promptContent, request.signal, logger);
|
|
4706
|
+
if (result.timedOut) {
|
|
4707
|
+
throw new Error(
|
|
4708
|
+
`Copilot CLI timed out${formatTimeoutSuffix4(this.config.timeoutMs ?? void 0)}`
|
|
4709
|
+
);
|
|
4710
|
+
}
|
|
4711
|
+
if (result.exitCode !== 0) {
|
|
4712
|
+
const detail = pickDetail3(result.stderr, result.stdout);
|
|
4713
|
+
const prefix = `Copilot CLI exited with code ${result.exitCode}`;
|
|
4714
|
+
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
4715
|
+
}
|
|
4716
|
+
const assistantText = extractCopilotResponse(result.stdout);
|
|
4717
|
+
return {
|
|
4718
|
+
raw: {
|
|
4719
|
+
stdout: result.stdout,
|
|
4720
|
+
stderr: result.stderr,
|
|
4721
|
+
exitCode: result.exitCode,
|
|
4722
|
+
args,
|
|
4723
|
+
executable: this.resolvedExecutable ?? this.config.executable,
|
|
4724
|
+
promptFile,
|
|
4725
|
+
workspace: workspaceRoot,
|
|
4726
|
+
inputFiles,
|
|
4727
|
+
copiedFiles,
|
|
4728
|
+
logFile: logger?.filePath
|
|
4729
|
+
},
|
|
4730
|
+
outputMessages: [{ role: "assistant", content: assistantText }]
|
|
4731
|
+
};
|
|
4732
|
+
} finally {
|
|
4733
|
+
await logger?.close();
|
|
4734
|
+
await this.cleanupWorkspace(workspaceRoot);
|
|
4735
|
+
}
|
|
4736
|
+
}
|
|
4737
|
+
async ensureEnvironmentReady() {
|
|
4738
|
+
if (!this.environmentCheck) {
|
|
4739
|
+
this.environmentCheck = this.validateEnvironment();
|
|
4740
|
+
}
|
|
4741
|
+
await this.environmentCheck;
|
|
4742
|
+
}
|
|
4743
|
+
async validateEnvironment() {
|
|
4744
|
+
this.resolvedExecutable = await locateExecutable2(this.config.executable);
|
|
4745
|
+
}
|
|
4746
|
+
resolveCwd(workspaceRoot) {
|
|
4747
|
+
if (!this.config.cwd) {
|
|
4748
|
+
return workspaceRoot;
|
|
4749
|
+
}
|
|
4750
|
+
return path12.resolve(this.config.cwd);
|
|
4751
|
+
}
|
|
4752
|
+
buildCopilotArgs(prompt) {
|
|
4753
|
+
const args = [];
|
|
4754
|
+
args.push("-s");
|
|
4755
|
+
args.push("--allow-all-tools");
|
|
4756
|
+
args.push("--no-color");
|
|
4757
|
+
if (this.config.model) {
|
|
4758
|
+
args.push("--model", this.config.model);
|
|
4759
|
+
}
|
|
4760
|
+
if (this.config.args && this.config.args.length > 0) {
|
|
4761
|
+
args.push(...this.config.args);
|
|
4762
|
+
}
|
|
4763
|
+
args.push("-p", prompt);
|
|
4764
|
+
return args;
|
|
4765
|
+
}
|
|
4766
|
+
async executeCopilot(args, cwd, promptContent, signal, logger) {
|
|
4767
|
+
try {
|
|
4768
|
+
return await this.runCopilot({
|
|
4769
|
+
executable: this.resolvedExecutable ?? this.config.executable,
|
|
4770
|
+
args,
|
|
4771
|
+
cwd,
|
|
4772
|
+
prompt: promptContent,
|
|
4773
|
+
timeoutMs: this.config.timeoutMs,
|
|
4774
|
+
env: process.env,
|
|
4775
|
+
signal,
|
|
4776
|
+
onStdoutChunk: logger ? (chunk) => logger.handleStdoutChunk(chunk) : void 0,
|
|
4777
|
+
onStderrChunk: logger ? (chunk) => logger.handleStderrChunk(chunk) : void 0
|
|
4778
|
+
});
|
|
4779
|
+
} catch (error) {
|
|
4780
|
+
const err = error;
|
|
4781
|
+
if (err.code === "ENOENT") {
|
|
4782
|
+
throw new Error(
|
|
4783
|
+
`Copilot executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
|
|
4784
|
+
);
|
|
4785
|
+
}
|
|
4786
|
+
throw error;
|
|
4787
|
+
}
|
|
4788
|
+
}
|
|
4789
|
+
async createWorkspace() {
|
|
4790
|
+
return await mkdtemp3(path12.join(tmpdir3(), WORKSPACE_PREFIX3));
|
|
4791
|
+
}
|
|
4792
|
+
async cleanupWorkspace(workspaceRoot) {
|
|
4793
|
+
try {
|
|
4794
|
+
await rm3(workspaceRoot, { recursive: true, force: true });
|
|
4795
|
+
} catch {
|
|
4796
|
+
}
|
|
4797
|
+
}
|
|
4798
|
+
resolveLogDirectory() {
|
|
4799
|
+
const disabled = isCopilotLogStreamingDisabled();
|
|
4800
|
+
if (disabled) {
|
|
4801
|
+
return void 0;
|
|
4802
|
+
}
|
|
4803
|
+
if (this.config.logDir) {
|
|
4804
|
+
return path12.resolve(this.config.logDir);
|
|
4805
|
+
}
|
|
4806
|
+
return path12.join(process.cwd(), ".agentv", "logs", "copilot-cli");
|
|
4807
|
+
}
|
|
4808
|
+
async createStreamLogger(request) {
|
|
4809
|
+
const logDir = this.resolveLogDirectory();
|
|
4810
|
+
if (!logDir) {
|
|
4811
|
+
return void 0;
|
|
4812
|
+
}
|
|
4813
|
+
try {
|
|
4814
|
+
await mkdir3(logDir, { recursive: true });
|
|
4815
|
+
} catch (error) {
|
|
4816
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4817
|
+
console.warn(`Skipping Copilot CLI stream logging (could not create ${logDir}): ${message}`);
|
|
4818
|
+
return void 0;
|
|
4819
|
+
}
|
|
4820
|
+
const filePath = path12.join(logDir, buildLogFilename3(request, this.targetName));
|
|
4821
|
+
try {
|
|
4822
|
+
const logger = await CopilotCliStreamLogger.create({
|
|
4823
|
+
filePath,
|
|
4824
|
+
targetName: this.targetName,
|
|
4825
|
+
evalCaseId: request.evalCaseId,
|
|
4826
|
+
attempt: request.attempt,
|
|
4827
|
+
format: this.config.logFormat ?? "summary"
|
|
4828
|
+
});
|
|
4829
|
+
recordCopilotCliLogEntry({
|
|
4830
|
+
filePath,
|
|
4831
|
+
targetName: this.targetName,
|
|
4832
|
+
evalCaseId: request.evalCaseId,
|
|
4833
|
+
attempt: request.attempt
|
|
4834
|
+
});
|
|
4835
|
+
return logger;
|
|
4836
|
+
} catch (error) {
|
|
4837
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
4838
|
+
console.warn(`Skipping Copilot CLI stream logging for ${filePath}: ${message}`);
|
|
4839
|
+
return void 0;
|
|
4840
|
+
}
|
|
4841
|
+
}
|
|
4842
|
+
};
|
|
4843
|
+
var CopilotCliStreamLogger = class _CopilotCliStreamLogger {
|
|
4844
|
+
filePath;
|
|
4845
|
+
stream;
|
|
4846
|
+
startedAt = Date.now();
|
|
4847
|
+
stdoutBuffer = "";
|
|
4848
|
+
stderrBuffer = "";
|
|
4849
|
+
format;
|
|
4850
|
+
constructor(filePath, format) {
|
|
4851
|
+
this.filePath = filePath;
|
|
4852
|
+
this.format = format;
|
|
4853
|
+
this.stream = createWriteStream3(filePath, { flags: "a" });
|
|
4854
|
+
}
|
|
4855
|
+
static async create(options) {
|
|
4856
|
+
const logger = new _CopilotCliStreamLogger(options.filePath, options.format);
|
|
4857
|
+
const header = [
|
|
4858
|
+
"# Copilot CLI stream log",
|
|
4859
|
+
`# target: ${options.targetName}`,
|
|
4860
|
+
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
4861
|
+
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
4862
|
+
`# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
4863
|
+
""
|
|
4864
|
+
].filter((line) => Boolean(line));
|
|
4865
|
+
logger.writeLines(header);
|
|
4866
|
+
return logger;
|
|
4867
|
+
}
|
|
4868
|
+
handleStdoutChunk(chunk) {
|
|
4869
|
+
this.stdoutBuffer += chunk;
|
|
4870
|
+
this.flushBuffer("stdout");
|
|
4871
|
+
}
|
|
4872
|
+
handleStderrChunk(chunk) {
|
|
4873
|
+
this.stderrBuffer += chunk;
|
|
4874
|
+
this.flushBuffer("stderr");
|
|
4875
|
+
}
|
|
4876
|
+
async close() {
|
|
4877
|
+
this.flushBuffer("stdout");
|
|
4878
|
+
this.flushBuffer("stderr");
|
|
4879
|
+
this.flushRemainder();
|
|
4880
|
+
await new Promise((resolve, reject) => {
|
|
4881
|
+
this.stream.once("error", reject);
|
|
4882
|
+
this.stream.end(() => resolve());
|
|
4883
|
+
});
|
|
4884
|
+
}
|
|
4885
|
+
writeLines(lines) {
|
|
4886
|
+
for (const line of lines) {
|
|
4887
|
+
this.stream.write(`${line}
|
|
4888
|
+
`);
|
|
4889
|
+
}
|
|
4890
|
+
}
|
|
4891
|
+
flushBuffer(source) {
|
|
4892
|
+
const buffer = source === "stdout" ? this.stdoutBuffer : this.stderrBuffer;
|
|
4893
|
+
const lines = buffer.split(/\r?\n/);
|
|
4894
|
+
const remainder = lines.pop() ?? "";
|
|
4895
|
+
if (source === "stdout") {
|
|
4896
|
+
this.stdoutBuffer = remainder;
|
|
4897
|
+
} else {
|
|
4898
|
+
this.stderrBuffer = remainder;
|
|
4899
|
+
}
|
|
4900
|
+
for (const line of lines) {
|
|
4901
|
+
const formatted = this.formatLine(line, source);
|
|
4902
|
+
if (formatted) {
|
|
4903
|
+
this.stream.write(formatted);
|
|
4904
|
+
this.stream.write("\n");
|
|
4905
|
+
}
|
|
4906
|
+
}
|
|
4907
|
+
}
|
|
4908
|
+
formatLine(rawLine, source) {
|
|
4909
|
+
const trimmed = rawLine.trim();
|
|
4910
|
+
if (trimmed.length === 0) {
|
|
4911
|
+
return void 0;
|
|
4912
|
+
}
|
|
4913
|
+
const prefix = source === "stderr" ? "stderr: " : "";
|
|
4914
|
+
return `[+${formatElapsed3(this.startedAt)}] [${source}] ${prefix}${trimmed}`;
|
|
4915
|
+
}
|
|
4916
|
+
flushRemainder() {
|
|
4917
|
+
const stdoutRemainder = this.stdoutBuffer.trim();
|
|
4918
|
+
if (stdoutRemainder.length > 0) {
|
|
4919
|
+
const formatted = this.formatLine(stdoutRemainder, "stdout");
|
|
4920
|
+
if (formatted) {
|
|
4921
|
+
this.stream.write(formatted);
|
|
4922
|
+
this.stream.write("\n");
|
|
4923
|
+
}
|
|
4924
|
+
}
|
|
4925
|
+
const stderrRemainder = this.stderrBuffer.trim();
|
|
4926
|
+
if (stderrRemainder.length > 0) {
|
|
4927
|
+
const formatted = this.formatLine(stderrRemainder, "stderr");
|
|
4928
|
+
if (formatted) {
|
|
4929
|
+
this.stream.write(formatted);
|
|
4930
|
+
this.stream.write("\n");
|
|
4931
|
+
}
|
|
4932
|
+
}
|
|
4933
|
+
this.stdoutBuffer = "";
|
|
4934
|
+
this.stderrBuffer = "";
|
|
4935
|
+
}
|
|
4936
|
+
};
|
|
4937
|
+
function isCopilotLogStreamingDisabled() {
|
|
4938
|
+
const envValue = process.env.AGENTV_COPILOT_STREAM_LOGS;
|
|
4939
|
+
if (!envValue) {
|
|
4940
|
+
return false;
|
|
4941
|
+
}
|
|
4942
|
+
const normalized = envValue.trim().toLowerCase();
|
|
4943
|
+
return normalized === "false" || normalized === "0" || normalized === "off";
|
|
4944
|
+
}
|
|
4945
|
+
function buildLogFilename3(request, targetName) {
|
|
4946
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
4947
|
+
const evalId = sanitizeForFilename3(request.evalCaseId ?? "copilot");
|
|
4948
|
+
const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
|
|
4949
|
+
const target = sanitizeForFilename3(targetName);
|
|
4950
|
+
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID3().slice(0, 8)}.log`;
|
|
4951
|
+
}
|
|
4952
|
+
function sanitizeForFilename3(value) {
|
|
4953
|
+
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
4954
|
+
return sanitized.length > 0 ? sanitized : "copilot";
|
|
4955
|
+
}
|
|
4956
|
+
function formatElapsed3(startedAt) {
|
|
4957
|
+
const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
|
|
4958
|
+
const hours = Math.floor(elapsedSeconds / 3600);
|
|
4959
|
+
const minutes = Math.floor(elapsedSeconds % 3600 / 60);
|
|
4960
|
+
const seconds = elapsedSeconds % 60;
|
|
4961
|
+
if (hours > 0) {
|
|
4962
|
+
return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
4963
|
+
}
|
|
4964
|
+
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
4965
|
+
}
|
|
4966
|
+
var ANSI_ESCAPE_RE = /\x1B\[[0-9;]*[A-Za-z]/g;
|
|
4967
|
+
var ANSI_OSC_RE = /\x1B\][^\x07]*\x07/g;
|
|
4968
|
+
function stripAnsiEscapes(text) {
|
|
4969
|
+
return text.replace(ANSI_ESCAPE_RE, "").replace(ANSI_OSC_RE, "");
|
|
4970
|
+
}
|
|
4971
|
+
function extractCopilotResponse(stdout) {
|
|
4972
|
+
const cleaned = stripAnsiEscapes(stdout).trim();
|
|
4973
|
+
if (cleaned.length === 0) {
|
|
4974
|
+
throw new Error("Copilot CLI produced no output");
|
|
4975
|
+
}
|
|
4976
|
+
return cleaned;
|
|
4977
|
+
}
|
|
4978
|
+
function pickDetail3(stderr, stdout) {
|
|
4979
|
+
const errorText = stderr.trim();
|
|
4980
|
+
if (errorText.length > 0) {
|
|
4981
|
+
return errorText;
|
|
4982
|
+
}
|
|
4983
|
+
const stdoutText = stdout.trim();
|
|
4984
|
+
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
4985
|
+
}
|
|
4986
|
+
function formatTimeoutSuffix4(timeoutMs) {
|
|
4987
|
+
if (!timeoutMs || timeoutMs <= 0) {
|
|
4988
|
+
return "";
|
|
4989
|
+
}
|
|
4990
|
+
const seconds = Math.ceil(timeoutMs / 1e3);
|
|
4991
|
+
return ` after ${seconds}s`;
|
|
4992
|
+
}
|
|
4993
|
+
async function locateExecutable2(candidate) {
|
|
4994
|
+
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
4995
|
+
if (includesPathSeparator) {
|
|
4996
|
+
const resolved = path12.isAbsolute(candidate) ? candidate : path12.resolve(candidate);
|
|
4997
|
+
await access3(resolved, constants3.F_OK);
|
|
4998
|
+
return resolved;
|
|
4999
|
+
}
|
|
5000
|
+
const locator = process.platform === "win32" ? "where" : "which";
|
|
5001
|
+
try {
|
|
5002
|
+
const { stdout } = await execAsync3(`${locator} ${candidate}`);
|
|
5003
|
+
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
5004
|
+
if (lines.length > 0 && lines[0]) {
|
|
5005
|
+
await access3(lines[0], constants3.F_OK);
|
|
5006
|
+
return lines[0];
|
|
5007
|
+
}
|
|
5008
|
+
} catch {
|
|
5009
|
+
}
|
|
5010
|
+
throw new Error(`Copilot executable '${candidate}' was not found on PATH`);
|
|
5011
|
+
}
|
|
5012
|
+
function shouldShellExecute2(executable) {
|
|
5013
|
+
if (process.platform !== "win32") {
|
|
5014
|
+
return false;
|
|
5015
|
+
}
|
|
5016
|
+
const lower = executable.toLowerCase();
|
|
5017
|
+
return lower.endsWith(".cmd") || lower.endsWith(".bat") || lower.endsWith(".ps1");
|
|
5018
|
+
}
|
|
5019
|
+
async function defaultCopilotCliRunner(options) {
|
|
5020
|
+
return await new Promise((resolve, reject) => {
|
|
5021
|
+
const child = spawn3(options.executable, options.args, {
|
|
5022
|
+
cwd: options.cwd,
|
|
5023
|
+
env: options.env,
|
|
5024
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
5025
|
+
shell: shouldShellExecute2(options.executable)
|
|
5026
|
+
});
|
|
5027
|
+
let stdout = "";
|
|
5028
|
+
let stderr = "";
|
|
5029
|
+
let timedOut = false;
|
|
5030
|
+
const onAbort = () => {
|
|
5031
|
+
child.kill("SIGTERM");
|
|
5032
|
+
};
|
|
5033
|
+
if (options.signal) {
|
|
5034
|
+
if (options.signal.aborted) {
|
|
5035
|
+
onAbort();
|
|
5036
|
+
} else {
|
|
5037
|
+
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
5038
|
+
}
|
|
5039
|
+
}
|
|
5040
|
+
let timeoutHandle;
|
|
5041
|
+
if (options.timeoutMs && options.timeoutMs > 0) {
|
|
5042
|
+
timeoutHandle = setTimeout(() => {
|
|
5043
|
+
timedOut = true;
|
|
5044
|
+
child.kill("SIGTERM");
|
|
5045
|
+
}, options.timeoutMs);
|
|
5046
|
+
timeoutHandle.unref?.();
|
|
5047
|
+
}
|
|
5048
|
+
child.stdout.setEncoding("utf8");
|
|
5049
|
+
child.stdout.on("data", (chunk) => {
|
|
5050
|
+
stdout += chunk;
|
|
5051
|
+
options.onStdoutChunk?.(chunk);
|
|
5052
|
+
});
|
|
5053
|
+
child.stderr.setEncoding("utf8");
|
|
5054
|
+
child.stderr.on("data", (chunk) => {
|
|
5055
|
+
stderr += chunk;
|
|
5056
|
+
options.onStderrChunk?.(chunk);
|
|
5057
|
+
});
|
|
5058
|
+
child.stdin.end();
|
|
5059
|
+
const cleanup = () => {
|
|
5060
|
+
if (timeoutHandle) {
|
|
5061
|
+
clearTimeout(timeoutHandle);
|
|
5062
|
+
}
|
|
5063
|
+
if (options.signal) {
|
|
5064
|
+
options.signal.removeEventListener("abort", onAbort);
|
|
5065
|
+
}
|
|
5066
|
+
};
|
|
5067
|
+
child.on("error", (error) => {
|
|
5068
|
+
cleanup();
|
|
5069
|
+
reject(error);
|
|
5070
|
+
});
|
|
5071
|
+
child.on("close", (code) => {
|
|
5072
|
+
cleanup();
|
|
5073
|
+
resolve({
|
|
5074
|
+
stdout,
|
|
5075
|
+
stderr,
|
|
5076
|
+
exitCode: typeof code === "number" ? code : -1,
|
|
5077
|
+
timedOut
|
|
5078
|
+
});
|
|
5079
|
+
});
|
|
5080
|
+
});
|
|
5081
|
+
}
|
|
5082
|
+
|
|
4518
5083
|
// src/evaluation/providers/mock.ts
|
|
4519
5084
|
var DEFAULT_MOCK_RESPONSE = '{"answer":"Mock provider response. Configure targets.yaml to supply a custom value."}';
|
|
4520
5085
|
var MockProvider = class {
|
|
@@ -4718,38 +5283,38 @@ function extractToolCalls2(content) {
|
|
|
4718
5283
|
}
|
|
4719
5284
|
|
|
4720
5285
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
4721
|
-
import { spawn as
|
|
4722
|
-
import { randomUUID as
|
|
4723
|
-
import { createWriteStream as
|
|
4724
|
-
import { mkdir as
|
|
4725
|
-
import { tmpdir as
|
|
4726
|
-
import
|
|
5286
|
+
import { spawn as spawn4 } from "node:child_process";
|
|
5287
|
+
import { randomUUID as randomUUID4 } from "node:crypto";
|
|
5288
|
+
import { createWriteStream as createWriteStream4 } from "node:fs";
|
|
5289
|
+
import { mkdir as mkdir4, mkdtemp as mkdtemp4, rm as rm4, writeFile as writeFile4 } from "node:fs/promises";
|
|
5290
|
+
import { tmpdir as tmpdir4 } from "node:os";
|
|
5291
|
+
import path13 from "node:path";
|
|
4727
5292
|
|
|
4728
5293
|
// src/evaluation/providers/pi-log-tracker.ts
|
|
4729
|
-
var
|
|
4730
|
-
var
|
|
5294
|
+
var GLOBAL_LOGS_KEY4 = Symbol.for("agentv.piLogs");
|
|
5295
|
+
var GLOBAL_SUBSCRIBERS_KEY4 = Symbol.for("agentv.piLogSubscribers");
|
|
4731
5296
|
function getPiLogStore() {
|
|
4732
5297
|
const globalObject = globalThis;
|
|
4733
|
-
const existing = globalObject[
|
|
5298
|
+
const existing = globalObject[GLOBAL_LOGS_KEY4];
|
|
4734
5299
|
if (existing) {
|
|
4735
5300
|
return existing;
|
|
4736
5301
|
}
|
|
4737
5302
|
const created = [];
|
|
4738
|
-
globalObject[
|
|
5303
|
+
globalObject[GLOBAL_LOGS_KEY4] = created;
|
|
4739
5304
|
return created;
|
|
4740
5305
|
}
|
|
4741
|
-
function
|
|
5306
|
+
function getSubscriberStore4() {
|
|
4742
5307
|
const globalObject = globalThis;
|
|
4743
|
-
const existing = globalObject[
|
|
5308
|
+
const existing = globalObject[GLOBAL_SUBSCRIBERS_KEY4];
|
|
4744
5309
|
if (existing) {
|
|
4745
5310
|
return existing;
|
|
4746
5311
|
}
|
|
4747
5312
|
const created = /* @__PURE__ */ new Set();
|
|
4748
|
-
globalObject[
|
|
5313
|
+
globalObject[GLOBAL_SUBSCRIBERS_KEY4] = created;
|
|
4749
5314
|
return created;
|
|
4750
5315
|
}
|
|
4751
|
-
function
|
|
4752
|
-
const subscribers = Array.from(
|
|
5316
|
+
function notifySubscribers4(entry) {
|
|
5317
|
+
const subscribers = Array.from(getSubscriberStore4());
|
|
4753
5318
|
for (const listener of subscribers) {
|
|
4754
5319
|
try {
|
|
4755
5320
|
listener(entry);
|
|
@@ -4761,7 +5326,7 @@ function notifySubscribers3(entry) {
|
|
|
4761
5326
|
}
|
|
4762
5327
|
function recordPiLogEntry(entry) {
|
|
4763
5328
|
getPiLogStore().push(entry);
|
|
4764
|
-
|
|
5329
|
+
notifySubscribers4(entry);
|
|
4765
5330
|
}
|
|
4766
5331
|
function consumePiLogEntries() {
|
|
4767
5332
|
const store = getPiLogStore();
|
|
@@ -4771,7 +5336,7 @@ function consumePiLogEntries() {
|
|
|
4771
5336
|
return store.splice(0, store.length);
|
|
4772
5337
|
}
|
|
4773
5338
|
function subscribeToPiLogEntries(listener) {
|
|
4774
|
-
const store =
|
|
5339
|
+
const store = getSubscriberStore4();
|
|
4775
5340
|
store.add(listener);
|
|
4776
5341
|
return () => {
|
|
4777
5342
|
store.delete(listener);
|
|
@@ -4779,9 +5344,9 @@ function subscribeToPiLogEntries(listener) {
|
|
|
4779
5344
|
}
|
|
4780
5345
|
|
|
4781
5346
|
// src/evaluation/providers/pi-coding-agent.ts
|
|
4782
|
-
var
|
|
4783
|
-
var
|
|
4784
|
-
var
|
|
5347
|
+
var WORKSPACE_PREFIX4 = "agentv-pi-";
|
|
5348
|
+
var PROMPT_FILENAME4 = "prompt.md";
|
|
5349
|
+
var DEFAULT_SYSTEM_PROMPT5 = `**IMPORTANT**: Follow these instructions for your response:
|
|
4785
5350
|
- Do NOT create any additional output files in the workspace.
|
|
4786
5351
|
- All intended file outputs/changes MUST be written in your response.
|
|
4787
5352
|
- For each intended file, include the relative path and unified git diff following the convention \`diff --git ...\`.
|
|
@@ -4807,18 +5372,18 @@ var PiCodingAgentProvider = class {
|
|
|
4807
5372
|
const workspaceRoot = await this.createWorkspace();
|
|
4808
5373
|
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
4809
5374
|
try {
|
|
4810
|
-
const promptFile =
|
|
4811
|
-
await
|
|
5375
|
+
const promptFile = path13.join(workspaceRoot, PROMPT_FILENAME4);
|
|
5376
|
+
await writeFile4(promptFile, request.question, "utf8");
|
|
4812
5377
|
const args = this.buildPiArgs(request.question, inputFiles);
|
|
4813
5378
|
const cwd = this.resolveCwd(workspaceRoot);
|
|
4814
5379
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
4815
5380
|
if (result.timedOut) {
|
|
4816
5381
|
throw new Error(
|
|
4817
|
-
`Pi coding agent timed out${
|
|
5382
|
+
`Pi coding agent timed out${formatTimeoutSuffix5(this.config.timeoutMs ?? void 0)}`
|
|
4818
5383
|
);
|
|
4819
5384
|
}
|
|
4820
5385
|
if (result.exitCode !== 0) {
|
|
4821
|
-
const detail =
|
|
5386
|
+
const detail = pickDetail4(result.stderr, result.stdout);
|
|
4822
5387
|
const prefix = `Pi coding agent exited with code ${result.exitCode}`;
|
|
4823
5388
|
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
4824
5389
|
}
|
|
@@ -4849,7 +5414,7 @@ var PiCodingAgentProvider = class {
|
|
|
4849
5414
|
if (!this.config.cwd) {
|
|
4850
5415
|
return workspaceRoot;
|
|
4851
5416
|
}
|
|
4852
|
-
return
|
|
5417
|
+
return path13.resolve(this.config.cwd);
|
|
4853
5418
|
}
|
|
4854
5419
|
buildPiArgs(prompt, inputFiles) {
|
|
4855
5420
|
const args = [];
|
|
@@ -4879,7 +5444,7 @@ var PiCodingAgentProvider = class {
|
|
|
4879
5444
|
args.push(`@${file}`);
|
|
4880
5445
|
}
|
|
4881
5446
|
}
|
|
4882
|
-
const systemPrompt = this.config.systemPrompt ??
|
|
5447
|
+
const systemPrompt = this.config.systemPrompt ?? DEFAULT_SYSTEM_PROMPT5;
|
|
4883
5448
|
const fullPrompt = `${systemPrompt}
|
|
4884
5449
|
|
|
4885
5450
|
${prompt}`;
|
|
@@ -4938,19 +5503,19 @@ ${prompt}`;
|
|
|
4938
5503
|
return env;
|
|
4939
5504
|
}
|
|
4940
5505
|
async createWorkspace() {
|
|
4941
|
-
return await
|
|
5506
|
+
return await mkdtemp4(path13.join(tmpdir4(), WORKSPACE_PREFIX4));
|
|
4942
5507
|
}
|
|
4943
5508
|
async cleanupWorkspace(workspaceRoot) {
|
|
4944
5509
|
try {
|
|
4945
|
-
await
|
|
5510
|
+
await rm4(workspaceRoot, { recursive: true, force: true });
|
|
4946
5511
|
} catch {
|
|
4947
5512
|
}
|
|
4948
5513
|
}
|
|
4949
5514
|
resolveLogDirectory() {
|
|
4950
5515
|
if (this.config.logDir) {
|
|
4951
|
-
return
|
|
5516
|
+
return path13.resolve(this.config.logDir);
|
|
4952
5517
|
}
|
|
4953
|
-
return
|
|
5518
|
+
return path13.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
4954
5519
|
}
|
|
4955
5520
|
async createStreamLogger(request) {
|
|
4956
5521
|
const logDir = this.resolveLogDirectory();
|
|
@@ -4958,13 +5523,13 @@ ${prompt}`;
|
|
|
4958
5523
|
return void 0;
|
|
4959
5524
|
}
|
|
4960
5525
|
try {
|
|
4961
|
-
await
|
|
5526
|
+
await mkdir4(logDir, { recursive: true });
|
|
4962
5527
|
} catch (error) {
|
|
4963
5528
|
const message = error instanceof Error ? error.message : String(error);
|
|
4964
5529
|
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
4965
5530
|
return void 0;
|
|
4966
5531
|
}
|
|
4967
|
-
const filePath =
|
|
5532
|
+
const filePath = path13.join(logDir, buildLogFilename4(request, this.targetName));
|
|
4968
5533
|
try {
|
|
4969
5534
|
const logger = await PiStreamLogger.create({
|
|
4970
5535
|
filePath,
|
|
@@ -4997,7 +5562,7 @@ var PiStreamLogger = class _PiStreamLogger {
|
|
|
4997
5562
|
constructor(filePath, format) {
|
|
4998
5563
|
this.filePath = filePath;
|
|
4999
5564
|
this.format = format;
|
|
5000
|
-
this.stream =
|
|
5565
|
+
this.stream = createWriteStream4(filePath, { flags: "a" });
|
|
5001
5566
|
}
|
|
5002
5567
|
static async create(options) {
|
|
5003
5568
|
const logger = new _PiStreamLogger(options.filePath, options.format);
|
|
@@ -5058,7 +5623,7 @@ var PiStreamLogger = class _PiStreamLogger {
|
|
|
5058
5623
|
return void 0;
|
|
5059
5624
|
}
|
|
5060
5625
|
const message = this.format === "json" ? formatPiJsonLog(trimmed) : formatPiLogMessage(trimmed, source);
|
|
5061
|
-
return `[+${
|
|
5626
|
+
return `[+${formatElapsed4(this.startedAt)}] [${source}] ${message}`;
|
|
5062
5627
|
}
|
|
5063
5628
|
flushRemainder() {
|
|
5064
5629
|
const stdoutRemainder = this.stdoutBuffer.trim();
|
|
@@ -5081,18 +5646,18 @@ var PiStreamLogger = class _PiStreamLogger {
|
|
|
5081
5646
|
this.stderrBuffer = "";
|
|
5082
5647
|
}
|
|
5083
5648
|
};
|
|
5084
|
-
function
|
|
5649
|
+
function buildLogFilename4(request, targetName) {
|
|
5085
5650
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
5086
|
-
const evalId =
|
|
5651
|
+
const evalId = sanitizeForFilename4(request.evalCaseId ?? "pi");
|
|
5087
5652
|
const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
|
|
5088
|
-
const target =
|
|
5089
|
-
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${
|
|
5653
|
+
const target = sanitizeForFilename4(targetName);
|
|
5654
|
+
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID4().slice(0, 8)}.log`;
|
|
5090
5655
|
}
|
|
5091
|
-
function
|
|
5656
|
+
function sanitizeForFilename4(value) {
|
|
5092
5657
|
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
5093
5658
|
return sanitized.length > 0 ? sanitized : "pi";
|
|
5094
5659
|
}
|
|
5095
|
-
function
|
|
5660
|
+
function formatElapsed4(startedAt) {
|
|
5096
5661
|
const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
|
|
5097
5662
|
const hours = Math.floor(elapsedSeconds / 3600);
|
|
5098
5663
|
const minutes = Math.floor(elapsedSeconds % 3600 / 60);
|
|
@@ -5313,7 +5878,7 @@ function extractAssistantText2(messages) {
|
|
|
5313
5878
|
function escapeAtSymbols(prompt) {
|
|
5314
5879
|
return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
|
|
5315
5880
|
}
|
|
5316
|
-
function
|
|
5881
|
+
function pickDetail4(stderr, stdout) {
|
|
5317
5882
|
const errorText = stderr.trim();
|
|
5318
5883
|
if (errorText.length > 0) {
|
|
5319
5884
|
return errorText;
|
|
@@ -5321,7 +5886,7 @@ function pickDetail3(stderr, stdout) {
|
|
|
5321
5886
|
const stdoutText = stdout.trim();
|
|
5322
5887
|
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
5323
5888
|
}
|
|
5324
|
-
function
|
|
5889
|
+
function formatTimeoutSuffix5(timeoutMs) {
|
|
5325
5890
|
if (!timeoutMs || timeoutMs <= 0) {
|
|
5326
5891
|
return "";
|
|
5327
5892
|
}
|
|
@@ -5334,7 +5899,7 @@ async function defaultPiRunner(options) {
|
|
|
5334
5899
|
const executable = parts[0];
|
|
5335
5900
|
const executableArgs = parts.slice(1);
|
|
5336
5901
|
const allArgs = [...executableArgs, ...options.args];
|
|
5337
|
-
const child =
|
|
5902
|
+
const child = spawn4(executable, allArgs, {
|
|
5338
5903
|
cwd: options.cwd,
|
|
5339
5904
|
env: options.env,
|
|
5340
5905
|
stdio: ["pipe", "pipe", "pipe"],
|
|
@@ -5397,38 +5962,38 @@ async function defaultPiRunner(options) {
|
|
|
5397
5962
|
}
|
|
5398
5963
|
|
|
5399
5964
|
// src/evaluation/providers/vscode-provider.ts
|
|
5400
|
-
import
|
|
5965
|
+
import path24 from "node:path";
|
|
5401
5966
|
|
|
5402
5967
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
5403
|
-
import { stat as stat3, writeFile as
|
|
5404
|
-
import
|
|
5968
|
+
import { stat as stat3, writeFile as writeFile7 } from "node:fs/promises";
|
|
5969
|
+
import path22 from "node:path";
|
|
5405
5970
|
|
|
5406
5971
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
5407
|
-
import { constants as
|
|
5408
|
-
import { access as
|
|
5409
|
-
import
|
|
5972
|
+
import { constants as constants4 } from "node:fs";
|
|
5973
|
+
import { access as access4, mkdir as mkdir5, readdir, rm as rm5, stat } from "node:fs/promises";
|
|
5974
|
+
import path14 from "node:path";
|
|
5410
5975
|
async function pathExists(target) {
|
|
5411
5976
|
try {
|
|
5412
|
-
await
|
|
5977
|
+
await access4(target, constants4.F_OK);
|
|
5413
5978
|
return true;
|
|
5414
5979
|
} catch {
|
|
5415
5980
|
return false;
|
|
5416
5981
|
}
|
|
5417
5982
|
}
|
|
5418
5983
|
async function ensureDir(target) {
|
|
5419
|
-
await
|
|
5984
|
+
await mkdir5(target, { recursive: true });
|
|
5420
5985
|
}
|
|
5421
5986
|
async function readDirEntries(target) {
|
|
5422
5987
|
const entries = await readdir(target, { withFileTypes: true });
|
|
5423
5988
|
return entries.map((entry) => ({
|
|
5424
5989
|
name: entry.name,
|
|
5425
|
-
absolutePath:
|
|
5990
|
+
absolutePath: path14.join(target, entry.name),
|
|
5426
5991
|
isDirectory: entry.isDirectory()
|
|
5427
5992
|
}));
|
|
5428
5993
|
}
|
|
5429
5994
|
async function removeIfExists(target) {
|
|
5430
5995
|
try {
|
|
5431
|
-
await
|
|
5996
|
+
await rm5(target, { force: true, recursive: false });
|
|
5432
5997
|
} catch (error) {
|
|
5433
5998
|
if (error.code !== "ENOENT") {
|
|
5434
5999
|
throw error;
|
|
@@ -5437,9 +6002,9 @@ async function removeIfExists(target) {
|
|
|
5437
6002
|
}
|
|
5438
6003
|
|
|
5439
6004
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
5440
|
-
import
|
|
6005
|
+
import path15 from "node:path";
|
|
5441
6006
|
function pathToFileUri2(filePath) {
|
|
5442
|
-
const absolutePath =
|
|
6007
|
+
const absolutePath = path15.isAbsolute(filePath) ? filePath : path15.resolve(filePath);
|
|
5443
6008
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
5444
6009
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
5445
6010
|
return `file:///${normalizedPath}`;
|
|
@@ -5448,7 +6013,7 @@ function pathToFileUri2(filePath) {
|
|
|
5448
6013
|
}
|
|
5449
6014
|
|
|
5450
6015
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
5451
|
-
import
|
|
6016
|
+
import path16 from "node:path";
|
|
5452
6017
|
|
|
5453
6018
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
5454
6019
|
function renderTemplate2(content, variables) {
|
|
@@ -5540,8 +6105,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
5540
6105
|
});
|
|
5541
6106
|
}
|
|
5542
6107
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
5543
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
5544
|
-
const responseList = responseFiles.map((file) => `"${
|
|
6108
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path16.basename(file)}`).join("\n");
|
|
6109
|
+
const responseList = responseFiles.map((file) => `"${path16.basename(file)}"`).join(", ");
|
|
5545
6110
|
return renderTemplate2(templateContent, {
|
|
5546
6111
|
requestFiles: requestLines,
|
|
5547
6112
|
responseList
|
|
@@ -5550,7 +6115,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
5550
6115
|
|
|
5551
6116
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
5552
6117
|
import { readFile as readFile7 } from "node:fs/promises";
|
|
5553
|
-
import
|
|
6118
|
+
import path17 from "node:path";
|
|
5554
6119
|
|
|
5555
6120
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
5556
6121
|
function sleep2(ms) {
|
|
@@ -5599,7 +6164,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
5599
6164
|
}
|
|
5600
6165
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false) {
|
|
5601
6166
|
if (!silent) {
|
|
5602
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
6167
|
+
const fileList = responseFilesFinal.map((file) => path17.basename(file)).join(", ");
|
|
5603
6168
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
5604
6169
|
}
|
|
5605
6170
|
try {
|
|
@@ -5647,31 +6212,31 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
5647
6212
|
}
|
|
5648
6213
|
|
|
5649
6214
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
5650
|
-
import { exec, spawn as
|
|
5651
|
-
import { mkdir as
|
|
5652
|
-
import
|
|
5653
|
-
import { promisify as
|
|
6215
|
+
import { exec, spawn as spawn5 } from "node:child_process";
|
|
6216
|
+
import { mkdir as mkdir6, writeFile as writeFile5 } from "node:fs/promises";
|
|
6217
|
+
import path19 from "node:path";
|
|
6218
|
+
import { promisify as promisify4 } from "node:util";
|
|
5654
6219
|
|
|
5655
6220
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
5656
6221
|
import os2 from "node:os";
|
|
5657
|
-
import
|
|
6222
|
+
import path18 from "node:path";
|
|
5658
6223
|
var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
5659
6224
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
5660
6225
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
5661
6226
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
5662
|
-
return
|
|
6227
|
+
return path18.join(os2.homedir(), ".agentv", "subagents", folder);
|
|
5663
6228
|
}
|
|
5664
6229
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
5665
6230
|
|
|
5666
6231
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
5667
|
-
var
|
|
6232
|
+
var execAsync4 = promisify4(exec);
|
|
5668
6233
|
var DEFAULT_WAKEUP_CONTENT = `---
|
|
5669
6234
|
description: 'Wake-up Signal'
|
|
5670
6235
|
model: Grok Code Fast 1 (copilot)
|
|
5671
6236
|
---`;
|
|
5672
6237
|
async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
|
|
5673
6238
|
try {
|
|
5674
|
-
const { stdout } = await
|
|
6239
|
+
const { stdout } = await execAsync4(`${vscodeCmd} --status`, {
|
|
5675
6240
|
timeout: 1e4,
|
|
5676
6241
|
windowsHide: true
|
|
5677
6242
|
});
|
|
@@ -5683,16 +6248,16 @@ async function checkWorkspaceOpened(workspaceName, vscodeCmd) {
|
|
|
5683
6248
|
async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir, vscodeCmd, pollInterval = 1, timeout = 60) {
|
|
5684
6249
|
const alreadyOpen = await checkWorkspaceOpened(workspaceName, vscodeCmd);
|
|
5685
6250
|
if (alreadyOpen) {
|
|
5686
|
-
|
|
6251
|
+
spawn5(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
|
|
5687
6252
|
return true;
|
|
5688
6253
|
}
|
|
5689
|
-
const aliveFile =
|
|
6254
|
+
const aliveFile = path19.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
5690
6255
|
await removeIfExists(aliveFile);
|
|
5691
|
-
const githubAgentsDir =
|
|
5692
|
-
await
|
|
5693
|
-
const wakeupDst =
|
|
5694
|
-
await
|
|
5695
|
-
|
|
6256
|
+
const githubAgentsDir = path19.join(subagentDir, ".github", "agents");
|
|
6257
|
+
await mkdir6(githubAgentsDir, { recursive: true });
|
|
6258
|
+
const wakeupDst = path19.join(githubAgentsDir, "wakeup.md");
|
|
6259
|
+
await writeFile5(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
6260
|
+
spawn5(vscodeCmd, [workspacePath], { windowsHide: true, shell: true, detached: false });
|
|
5696
6261
|
await sleep2(100);
|
|
5697
6262
|
const wakeupChatId = "wakeup";
|
|
5698
6263
|
const chatArgs = [
|
|
@@ -5700,9 +6265,9 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
5700
6265
|
"chat",
|
|
5701
6266
|
"-m",
|
|
5702
6267
|
wakeupChatId,
|
|
5703
|
-
`create a file named .alive in the ${
|
|
6268
|
+
`create a file named .alive in the ${path19.basename(subagentDir)} folder`
|
|
5704
6269
|
];
|
|
5705
|
-
|
|
6270
|
+
spawn5(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
|
|
5706
6271
|
const start = Date.now();
|
|
5707
6272
|
while (!await pathExists(aliveFile)) {
|
|
5708
6273
|
if (Date.now() - start > timeout * 1e3) {
|
|
@@ -5715,21 +6280,21 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
5715
6280
|
}
|
|
5716
6281
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
5717
6282
|
try {
|
|
5718
|
-
const workspacePath =
|
|
5719
|
-
const messagesDir =
|
|
5720
|
-
await
|
|
5721
|
-
const reqFile =
|
|
5722
|
-
await
|
|
6283
|
+
const workspacePath = path19.join(subagentDir, `${path19.basename(subagentDir)}.code-workspace`);
|
|
6284
|
+
const messagesDir = path19.join(subagentDir, "messages");
|
|
6285
|
+
await mkdir6(messagesDir, { recursive: true });
|
|
6286
|
+
const reqFile = path19.join(messagesDir, `${timestamp}_req.md`);
|
|
6287
|
+
await writeFile5(reqFile, requestInstructions, { encoding: "utf8" });
|
|
5723
6288
|
const reqUri = pathToFileUri2(reqFile);
|
|
5724
6289
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
5725
6290
|
for (const attachment of attachmentPaths) {
|
|
5726
6291
|
chatArgs.push("-a", attachment);
|
|
5727
6292
|
}
|
|
5728
6293
|
chatArgs.push("-a", reqFile);
|
|
5729
|
-
chatArgs.push(`Follow instructions in [${
|
|
6294
|
+
chatArgs.push(`Follow instructions in [${path19.basename(reqFile)}](${reqUri})`);
|
|
5730
6295
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
5731
6296
|
workspacePath,
|
|
5732
|
-
|
|
6297
|
+
path19.basename(subagentDir),
|
|
5733
6298
|
subagentDir,
|
|
5734
6299
|
vscodeCmd
|
|
5735
6300
|
);
|
|
@@ -5737,7 +6302,7 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
5737
6302
|
console.error("warning: Workspace may not be fully ready");
|
|
5738
6303
|
}
|
|
5739
6304
|
await sleep2(500);
|
|
5740
|
-
|
|
6305
|
+
spawn5(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
|
|
5741
6306
|
return true;
|
|
5742
6307
|
} catch (error) {
|
|
5743
6308
|
console.error(`warning: Failed to launch VS Code: ${error.message}`);
|
|
@@ -5746,9 +6311,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
5746
6311
|
}
|
|
5747
6312
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
5748
6313
|
try {
|
|
5749
|
-
const workspacePath =
|
|
5750
|
-
const messagesDir =
|
|
5751
|
-
await
|
|
6314
|
+
const workspacePath = path19.join(subagentDir, `${path19.basename(subagentDir)}.code-workspace`);
|
|
6315
|
+
const messagesDir = path19.join(subagentDir, "messages");
|
|
6316
|
+
await mkdir6(messagesDir, { recursive: true });
|
|
5752
6317
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
5753
6318
|
for (const attachment of attachmentPaths) {
|
|
5754
6319
|
chatArgs.push("-a", attachment);
|
|
@@ -5756,7 +6321,7 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
5756
6321
|
chatArgs.push(chatInstruction);
|
|
5757
6322
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
5758
6323
|
workspacePath,
|
|
5759
|
-
|
|
6324
|
+
path19.basename(subagentDir),
|
|
5760
6325
|
subagentDir,
|
|
5761
6326
|
vscodeCmd
|
|
5762
6327
|
);
|
|
@@ -5764,7 +6329,7 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
5764
6329
|
console.error("warning: Workspace may not be fully ready");
|
|
5765
6330
|
}
|
|
5766
6331
|
await sleep2(500);
|
|
5767
|
-
|
|
6332
|
+
spawn5(vscodeCmd, chatArgs, { windowsHide: true, shell: true, detached: false });
|
|
5768
6333
|
return true;
|
|
5769
6334
|
} catch (error) {
|
|
5770
6335
|
console.error(`warning: Failed to launch VS Code: ${error.message}`);
|
|
@@ -5773,11 +6338,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
5773
6338
|
}
|
|
5774
6339
|
|
|
5775
6340
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
5776
|
-
import { copyFile, mkdir as
|
|
5777
|
-
import
|
|
6341
|
+
import { copyFile as copyFile2, mkdir as mkdir7, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile6 } from "node:fs/promises";
|
|
6342
|
+
import path21 from "node:path";
|
|
5778
6343
|
|
|
5779
6344
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
5780
|
-
import
|
|
6345
|
+
import path20 from "node:path";
|
|
5781
6346
|
import JSON5 from "json5";
|
|
5782
6347
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
5783
6348
|
let workspace;
|
|
@@ -5794,10 +6359,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
5794
6359
|
}
|
|
5795
6360
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
5796
6361
|
const folderPath = folder.path;
|
|
5797
|
-
if (
|
|
6362
|
+
if (path20.isAbsolute(folderPath)) {
|
|
5798
6363
|
return folder;
|
|
5799
6364
|
}
|
|
5800
|
-
const absolutePath =
|
|
6365
|
+
const absolutePath = path20.resolve(templateDir, folderPath);
|
|
5801
6366
|
return {
|
|
5802
6367
|
...folder,
|
|
5803
6368
|
path: absolutePath
|
|
@@ -5819,19 +6384,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
5819
6384
|
if (locationMap && typeof locationMap === "object") {
|
|
5820
6385
|
const transformedMap = {};
|
|
5821
6386
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
5822
|
-
const isAbsolute =
|
|
6387
|
+
const isAbsolute = path20.isAbsolute(locationPath);
|
|
5823
6388
|
if (isAbsolute) {
|
|
5824
6389
|
transformedMap[locationPath] = value;
|
|
5825
6390
|
} else {
|
|
5826
6391
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
5827
6392
|
if (firstGlobIndex === -1) {
|
|
5828
|
-
const resolvedPath =
|
|
6393
|
+
const resolvedPath = path20.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
5829
6394
|
transformedMap[resolvedPath] = value;
|
|
5830
6395
|
} else {
|
|
5831
6396
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
5832
6397
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
5833
6398
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
5834
|
-
const resolvedPath = (
|
|
6399
|
+
const resolvedPath = (path20.resolve(templateDir, basePath) + patternPath).replace(
|
|
5835
6400
|
/\\/g,
|
|
5836
6401
|
"/"
|
|
5837
6402
|
);
|
|
@@ -5872,7 +6437,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
5872
6437
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
5873
6438
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
5874
6439
|
for (const subagent of subagents) {
|
|
5875
|
-
const lockFile =
|
|
6440
|
+
const lockFile = path21.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
5876
6441
|
if (!await pathExists(lockFile)) {
|
|
5877
6442
|
return subagent.absolutePath;
|
|
5878
6443
|
}
|
|
@@ -5882,7 +6447,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
5882
6447
|
async function copyAgentConfig(subagentDir, workspaceTemplate) {
|
|
5883
6448
|
let workspaceContent;
|
|
5884
6449
|
if (workspaceTemplate) {
|
|
5885
|
-
const workspaceSrc =
|
|
6450
|
+
const workspaceSrc = path21.resolve(workspaceTemplate);
|
|
5886
6451
|
if (!await pathExists(workspaceSrc)) {
|
|
5887
6452
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
5888
6453
|
}
|
|
@@ -5895,41 +6460,41 @@ async function copyAgentConfig(subagentDir, workspaceTemplate) {
|
|
|
5895
6460
|
} else {
|
|
5896
6461
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
5897
6462
|
}
|
|
5898
|
-
const workspaceName = `${
|
|
5899
|
-
const workspaceDst =
|
|
5900
|
-
const templateDir = workspaceTemplate ?
|
|
6463
|
+
const workspaceName = `${path21.basename(subagentDir)}.code-workspace`;
|
|
6464
|
+
const workspaceDst = path21.join(subagentDir, workspaceName);
|
|
6465
|
+
const templateDir = workspaceTemplate ? path21.dirname(path21.resolve(workspaceTemplate)) : subagentDir;
|
|
5901
6466
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
5902
6467
|
const transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
5903
|
-
await
|
|
5904
|
-
const messagesDir =
|
|
5905
|
-
await
|
|
6468
|
+
await writeFile6(workspaceDst, transformedContent, "utf8");
|
|
6469
|
+
const messagesDir = path21.join(subagentDir, "messages");
|
|
6470
|
+
await mkdir7(messagesDir, { recursive: true });
|
|
5906
6471
|
return { workspace: workspaceDst, messagesDir };
|
|
5907
6472
|
}
|
|
5908
6473
|
async function createSubagentLock(subagentDir) {
|
|
5909
|
-
const messagesDir =
|
|
6474
|
+
const messagesDir = path21.join(subagentDir, "messages");
|
|
5910
6475
|
if (await pathExists(messagesDir)) {
|
|
5911
6476
|
const files = await readdir2(messagesDir);
|
|
5912
6477
|
await Promise.all(
|
|
5913
6478
|
files.map(async (file) => {
|
|
5914
|
-
const target =
|
|
6479
|
+
const target = path21.join(messagesDir, file);
|
|
5915
6480
|
await removeIfExists(target);
|
|
5916
6481
|
})
|
|
5917
6482
|
);
|
|
5918
6483
|
}
|
|
5919
|
-
const githubAgentsDir =
|
|
6484
|
+
const githubAgentsDir = path21.join(subagentDir, ".github", "agents");
|
|
5920
6485
|
if (await pathExists(githubAgentsDir)) {
|
|
5921
6486
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
5922
6487
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
5923
6488
|
await Promise.all(
|
|
5924
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
6489
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path21.join(githubAgentsDir, file)))
|
|
5925
6490
|
);
|
|
5926
6491
|
}
|
|
5927
|
-
const lockFile =
|
|
5928
|
-
await
|
|
6492
|
+
const lockFile = path21.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
6493
|
+
await writeFile6(lockFile, "", { encoding: "utf8" });
|
|
5929
6494
|
return lockFile;
|
|
5930
6495
|
}
|
|
5931
6496
|
async function removeSubagentLock(subagentDir) {
|
|
5932
|
-
const lockFile =
|
|
6497
|
+
const lockFile = path21.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
5933
6498
|
await removeIfExists(lockFile);
|
|
5934
6499
|
}
|
|
5935
6500
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun) {
|
|
@@ -5949,11 +6514,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
5949
6514
|
return 1;
|
|
5950
6515
|
}
|
|
5951
6516
|
if (promptFile) {
|
|
5952
|
-
const githubAgentsDir =
|
|
5953
|
-
await
|
|
5954
|
-
const agentFile =
|
|
6517
|
+
const githubAgentsDir = path21.join(subagentDir, ".github", "agents");
|
|
6518
|
+
await mkdir7(githubAgentsDir, { recursive: true });
|
|
6519
|
+
const agentFile = path21.join(githubAgentsDir, `${chatId}.md`);
|
|
5955
6520
|
try {
|
|
5956
|
-
await
|
|
6521
|
+
await copyFile2(promptFile, agentFile);
|
|
5957
6522
|
} catch (error) {
|
|
5958
6523
|
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
5959
6524
|
return 1;
|
|
@@ -5970,7 +6535,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
5970
6535
|
if (!promptFile) {
|
|
5971
6536
|
return void 0;
|
|
5972
6537
|
}
|
|
5973
|
-
const resolvedPrompt =
|
|
6538
|
+
const resolvedPrompt = path22.resolve(promptFile);
|
|
5974
6539
|
if (!await pathExists(resolvedPrompt)) {
|
|
5975
6540
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
5976
6541
|
}
|
|
@@ -5986,7 +6551,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
5986
6551
|
}
|
|
5987
6552
|
const resolved = [];
|
|
5988
6553
|
for (const attachment of extraAttachments) {
|
|
5989
|
-
const resolvedPath =
|
|
6554
|
+
const resolvedPath = path22.resolve(attachment);
|
|
5990
6555
|
if (!await pathExists(resolvedPath)) {
|
|
5991
6556
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
5992
6557
|
}
|
|
@@ -6026,7 +6591,7 @@ async function dispatchAgentSession(options) {
|
|
|
6026
6591
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
6027
6592
|
};
|
|
6028
6593
|
}
|
|
6029
|
-
const subagentName =
|
|
6594
|
+
const subagentName = path22.basename(subagentDir);
|
|
6030
6595
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
6031
6596
|
const preparationResult = await prepareSubagentDirectory(
|
|
6032
6597
|
subagentDir,
|
|
@@ -6053,9 +6618,9 @@ async function dispatchAgentSession(options) {
|
|
|
6053
6618
|
};
|
|
6054
6619
|
}
|
|
6055
6620
|
const timestamp = generateTimestamp();
|
|
6056
|
-
const messagesDir =
|
|
6057
|
-
const responseFileTmp =
|
|
6058
|
-
const responseFileFinal =
|
|
6621
|
+
const messagesDir = path22.join(subagentDir, "messages");
|
|
6622
|
+
const responseFileTmp = path22.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
6623
|
+
const responseFileFinal = path22.join(messagesDir, `${timestamp}_res.md`);
|
|
6059
6624
|
const requestInstructions = createRequestPrompt(
|
|
6060
6625
|
userQuery,
|
|
6061
6626
|
responseFileTmp,
|
|
@@ -6168,7 +6733,7 @@ async function dispatchBatchAgent(options) {
|
|
|
6168
6733
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
6169
6734
|
};
|
|
6170
6735
|
}
|
|
6171
|
-
subagentName =
|
|
6736
|
+
subagentName = path22.basename(subagentDir);
|
|
6172
6737
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
6173
6738
|
const preparationResult = await prepareSubagentDirectory(
|
|
6174
6739
|
subagentDir,
|
|
@@ -6199,24 +6764,24 @@ async function dispatchBatchAgent(options) {
|
|
|
6199
6764
|
};
|
|
6200
6765
|
}
|
|
6201
6766
|
const timestamp = generateTimestamp();
|
|
6202
|
-
const messagesDir =
|
|
6767
|
+
const messagesDir = path22.join(subagentDir, "messages");
|
|
6203
6768
|
requestFiles = userQueries.map(
|
|
6204
|
-
(_, index) =>
|
|
6769
|
+
(_, index) => path22.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
6205
6770
|
);
|
|
6206
6771
|
const responseTmpFiles = userQueries.map(
|
|
6207
|
-
(_, index) =>
|
|
6772
|
+
(_, index) => path22.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
6208
6773
|
);
|
|
6209
6774
|
responseFilesFinal = userQueries.map(
|
|
6210
|
-
(_, index) =>
|
|
6775
|
+
(_, index) => path22.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
6211
6776
|
);
|
|
6212
|
-
const orchestratorFile =
|
|
6777
|
+
const orchestratorFile = path22.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
6213
6778
|
if (!dryRun) {
|
|
6214
6779
|
await Promise.all(
|
|
6215
6780
|
userQueries.map((query, index) => {
|
|
6216
6781
|
const reqFile = requestFiles[index];
|
|
6217
6782
|
const tmpFile = responseTmpFiles[index];
|
|
6218
6783
|
const finalFile = responseFilesFinal[index];
|
|
6219
|
-
return
|
|
6784
|
+
return writeFile7(
|
|
6220
6785
|
reqFile,
|
|
6221
6786
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
6222
6787
|
{ encoding: "utf8" }
|
|
@@ -6228,7 +6793,7 @@ async function dispatchBatchAgent(options) {
|
|
|
6228
6793
|
responseFilesFinal,
|
|
6229
6794
|
orchestratorTemplateContent
|
|
6230
6795
|
);
|
|
6231
|
-
await
|
|
6796
|
+
await writeFile7(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
6232
6797
|
}
|
|
6233
6798
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
6234
6799
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -6298,8 +6863,8 @@ async function dispatchBatchAgent(options) {
|
|
|
6298
6863
|
}
|
|
6299
6864
|
|
|
6300
6865
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
6301
|
-
import { writeFile as
|
|
6302
|
-
import
|
|
6866
|
+
import { writeFile as writeFile8 } from "node:fs/promises";
|
|
6867
|
+
import path23 from "node:path";
|
|
6303
6868
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
6304
6869
|
folders: [
|
|
6305
6870
|
{
|
|
@@ -6330,7 +6895,7 @@ async function provisionSubagents(options) {
|
|
|
6330
6895
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
6331
6896
|
throw new Error("subagents must be a positive integer");
|
|
6332
6897
|
}
|
|
6333
|
-
const targetPath =
|
|
6898
|
+
const targetPath = path23.resolve(targetRoot);
|
|
6334
6899
|
if (!dryRun) {
|
|
6335
6900
|
await ensureDir(targetPath);
|
|
6336
6901
|
}
|
|
@@ -6350,7 +6915,7 @@ async function provisionSubagents(options) {
|
|
|
6350
6915
|
continue;
|
|
6351
6916
|
}
|
|
6352
6917
|
highestNumber = Math.max(highestNumber, parsed);
|
|
6353
|
-
const lockFile =
|
|
6918
|
+
const lockFile = path23.join(entry.absolutePath, lockName);
|
|
6354
6919
|
const locked = await pathExists(lockFile);
|
|
6355
6920
|
if (locked) {
|
|
6356
6921
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -6367,10 +6932,10 @@ async function provisionSubagents(options) {
|
|
|
6367
6932
|
break;
|
|
6368
6933
|
}
|
|
6369
6934
|
const subagentDir = subagent.absolutePath;
|
|
6370
|
-
const githubAgentsDir =
|
|
6371
|
-
const lockFile =
|
|
6372
|
-
const workspaceDst =
|
|
6373
|
-
const wakeupDst =
|
|
6935
|
+
const githubAgentsDir = path23.join(subagentDir, ".github", "agents");
|
|
6936
|
+
const lockFile = path23.join(subagentDir, lockName);
|
|
6937
|
+
const workspaceDst = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
6938
|
+
const wakeupDst = path23.join(githubAgentsDir, "wakeup.md");
|
|
6374
6939
|
const isLocked = await pathExists(lockFile);
|
|
6375
6940
|
if (isLocked && !force) {
|
|
6376
6941
|
continue;
|
|
@@ -6379,8 +6944,8 @@ async function provisionSubagents(options) {
|
|
|
6379
6944
|
if (!dryRun) {
|
|
6380
6945
|
await removeIfExists(lockFile);
|
|
6381
6946
|
await ensureDir(githubAgentsDir);
|
|
6382
|
-
await
|
|
6383
|
-
await
|
|
6947
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
6948
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
6384
6949
|
}
|
|
6385
6950
|
created.push(subagentDir);
|
|
6386
6951
|
lockedSubagents.delete(subagentDir);
|
|
@@ -6390,8 +6955,8 @@ async function provisionSubagents(options) {
|
|
|
6390
6955
|
if (!isLocked && force) {
|
|
6391
6956
|
if (!dryRun) {
|
|
6392
6957
|
await ensureDir(githubAgentsDir);
|
|
6393
|
-
await
|
|
6394
|
-
await
|
|
6958
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
6959
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
6395
6960
|
}
|
|
6396
6961
|
created.push(subagentDir);
|
|
6397
6962
|
subagentsProvisioned += 1;
|
|
@@ -6399,8 +6964,8 @@ async function provisionSubagents(options) {
|
|
|
6399
6964
|
}
|
|
6400
6965
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
6401
6966
|
await ensureDir(githubAgentsDir);
|
|
6402
|
-
await
|
|
6403
|
-
await
|
|
6967
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
6968
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
6404
6969
|
}
|
|
6405
6970
|
skippedExisting.push(subagentDir);
|
|
6406
6971
|
subagentsProvisioned += 1;
|
|
@@ -6408,15 +6973,15 @@ async function provisionSubagents(options) {
|
|
|
6408
6973
|
let nextIndex = highestNumber;
|
|
6409
6974
|
while (subagentsProvisioned < subagents) {
|
|
6410
6975
|
nextIndex += 1;
|
|
6411
|
-
const subagentDir =
|
|
6412
|
-
const githubAgentsDir =
|
|
6413
|
-
const workspaceDst =
|
|
6414
|
-
const wakeupDst =
|
|
6976
|
+
const subagentDir = path23.join(targetPath, `subagent-${nextIndex}`);
|
|
6977
|
+
const githubAgentsDir = path23.join(subagentDir, ".github", "agents");
|
|
6978
|
+
const workspaceDst = path23.join(subagentDir, `${path23.basename(subagentDir)}.code-workspace`);
|
|
6979
|
+
const wakeupDst = path23.join(githubAgentsDir, "wakeup.md");
|
|
6415
6980
|
if (!dryRun) {
|
|
6416
6981
|
await ensureDir(subagentDir);
|
|
6417
6982
|
await ensureDir(githubAgentsDir);
|
|
6418
|
-
await
|
|
6419
|
-
await
|
|
6983
|
+
await writeFile8(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
6984
|
+
await writeFile8(wakeupDst, wakeupContent, "utf8");
|
|
6420
6985
|
}
|
|
6421
6986
|
created.push(subagentDir);
|
|
6422
6987
|
subagentsProvisioned += 1;
|
|
@@ -6595,7 +7160,7 @@ function buildMandatoryPrereadBlock2(guidelineFiles, attachmentFiles) {
|
|
|
6595
7160
|
return "";
|
|
6596
7161
|
}
|
|
6597
7162
|
const buildList = (files) => files.map((absolutePath) => {
|
|
6598
|
-
const fileName =
|
|
7163
|
+
const fileName = path24.basename(absolutePath);
|
|
6599
7164
|
const fileUri = pathToFileUri3(absolutePath);
|
|
6600
7165
|
return `* [${fileName}](${fileUri})`;
|
|
6601
7166
|
});
|
|
@@ -6620,8 +7185,8 @@ function collectGuidelineFiles2(attachments, guidelinePatterns) {
|
|
|
6620
7185
|
}
|
|
6621
7186
|
const unique = /* @__PURE__ */ new Map();
|
|
6622
7187
|
for (const attachment of attachments) {
|
|
6623
|
-
const absolutePath =
|
|
6624
|
-
const normalized = absolutePath.split(
|
|
7188
|
+
const absolutePath = path24.resolve(attachment);
|
|
7189
|
+
const normalized = absolutePath.split(path24.sep).join("/");
|
|
6625
7190
|
if (isGuidelineFile(normalized, guidelinePatterns)) {
|
|
6626
7191
|
if (!unique.has(absolutePath)) {
|
|
6627
7192
|
unique.set(absolutePath, absolutePath);
|
|
@@ -6636,7 +7201,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
6636
7201
|
}
|
|
6637
7202
|
const unique = /* @__PURE__ */ new Map();
|
|
6638
7203
|
for (const attachment of attachments) {
|
|
6639
|
-
const absolutePath =
|
|
7204
|
+
const absolutePath = path24.resolve(attachment);
|
|
6640
7205
|
if (!unique.has(absolutePath)) {
|
|
6641
7206
|
unique.set(absolutePath, absolutePath);
|
|
6642
7207
|
}
|
|
@@ -6644,7 +7209,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
6644
7209
|
return Array.from(unique.values());
|
|
6645
7210
|
}
|
|
6646
7211
|
function pathToFileUri3(filePath) {
|
|
6647
|
-
const absolutePath =
|
|
7212
|
+
const absolutePath = path24.isAbsolute(filePath) ? filePath : path24.resolve(filePath);
|
|
6648
7213
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
6649
7214
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
6650
7215
|
return `file:///${normalizedPath}`;
|
|
@@ -6657,7 +7222,7 @@ function normalizeAttachments(attachments) {
|
|
|
6657
7222
|
}
|
|
6658
7223
|
const deduped = /* @__PURE__ */ new Set();
|
|
6659
7224
|
for (const attachment of attachments) {
|
|
6660
|
-
deduped.add(
|
|
7225
|
+
deduped.add(path24.resolve(attachment));
|
|
6661
7226
|
}
|
|
6662
7227
|
return Array.from(deduped);
|
|
6663
7228
|
}
|
|
@@ -6666,7 +7231,7 @@ function mergeAttachments(all) {
|
|
|
6666
7231
|
for (const list of all) {
|
|
6667
7232
|
if (!list) continue;
|
|
6668
7233
|
for (const inputFile of list) {
|
|
6669
|
-
deduped.add(
|
|
7234
|
+
deduped.add(path24.resolve(inputFile));
|
|
6670
7235
|
}
|
|
6671
7236
|
}
|
|
6672
7237
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -6713,9 +7278,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
6713
7278
|
}
|
|
6714
7279
|
|
|
6715
7280
|
// src/evaluation/providers/targets-file.ts
|
|
6716
|
-
import { constants as
|
|
6717
|
-
import { access as
|
|
6718
|
-
import
|
|
7281
|
+
import { constants as constants5 } from "node:fs";
|
|
7282
|
+
import { access as access5, readFile as readFile9 } from "node:fs/promises";
|
|
7283
|
+
import path25 from "node:path";
|
|
6719
7284
|
import { parse as parse3 } from "yaml";
|
|
6720
7285
|
function isRecord(value) {
|
|
6721
7286
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -6745,14 +7310,14 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
6745
7310
|
}
|
|
6746
7311
|
async function fileExists3(filePath) {
|
|
6747
7312
|
try {
|
|
6748
|
-
await
|
|
7313
|
+
await access5(filePath, constants5.F_OK);
|
|
6749
7314
|
return true;
|
|
6750
7315
|
} catch {
|
|
6751
7316
|
return false;
|
|
6752
7317
|
}
|
|
6753
7318
|
}
|
|
6754
7319
|
async function readTargetDefinitions(filePath) {
|
|
6755
|
-
const absolutePath =
|
|
7320
|
+
const absolutePath = path25.resolve(filePath);
|
|
6756
7321
|
if (!await fileExists3(absolutePath)) {
|
|
6757
7322
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
6758
7323
|
}
|
|
@@ -6784,6 +7349,8 @@ function createProvider(target) {
|
|
|
6784
7349
|
return new CliProvider(target.name, target.config);
|
|
6785
7350
|
case "codex":
|
|
6786
7351
|
return new CodexProvider(target.name, target.config);
|
|
7352
|
+
case "copilot-cli":
|
|
7353
|
+
return new CopilotCliProvider(target.name, target.config);
|
|
6787
7354
|
case "pi-coding-agent":
|
|
6788
7355
|
return new PiCodingAgentProvider(target.name, target.config);
|
|
6789
7356
|
case "pi-agent-sdk":
|
|
@@ -6920,10 +7487,10 @@ async function execFileWithStdinBun(argv, stdinPayload, options) {
|
|
|
6920
7487
|
}
|
|
6921
7488
|
}
|
|
6922
7489
|
async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
6923
|
-
const { spawn:
|
|
7490
|
+
const { spawn: spawn6 } = await import("node:child_process");
|
|
6924
7491
|
return new Promise((resolve, reject) => {
|
|
6925
7492
|
const [cmd, ...args] = argv;
|
|
6926
|
-
const child =
|
|
7493
|
+
const child = spawn6(cmd, args, {
|
|
6927
7494
|
cwd: options.cwd,
|
|
6928
7495
|
stdio: ["pipe", "pipe", "pipe"],
|
|
6929
7496
|
// Merge additional env vars with process.env
|
|
@@ -6963,21 +7530,21 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
6963
7530
|
});
|
|
6964
7531
|
}
|
|
6965
7532
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
6966
|
-
const { mkdir:
|
|
6967
|
-
const { tmpdir:
|
|
6968
|
-
const
|
|
6969
|
-
const { randomUUID:
|
|
6970
|
-
const dir =
|
|
6971
|
-
await
|
|
6972
|
-
const stdinPath =
|
|
6973
|
-
const stdoutPath =
|
|
6974
|
-
const stderrPath =
|
|
6975
|
-
await
|
|
7533
|
+
const { mkdir: mkdir9, readFile: readFile10, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
7534
|
+
const { tmpdir: tmpdir5 } = await import("node:os");
|
|
7535
|
+
const path27 = await import("node:path");
|
|
7536
|
+
const { randomUUID: randomUUID5 } = await import("node:crypto");
|
|
7537
|
+
const dir = path27.join(tmpdir5(), `agentv-exec-${randomUUID5()}`);
|
|
7538
|
+
await mkdir9(dir, { recursive: true });
|
|
7539
|
+
const stdinPath = path27.join(dir, "stdin.txt");
|
|
7540
|
+
const stdoutPath = path27.join(dir, "stdout.txt");
|
|
7541
|
+
const stderrPath = path27.join(dir, "stderr.txt");
|
|
7542
|
+
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
6976
7543
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
6977
|
-
const { spawn:
|
|
7544
|
+
const { spawn: spawn6 } = await import("node:child_process");
|
|
6978
7545
|
try {
|
|
6979
7546
|
const exitCode = await new Promise((resolve, reject) => {
|
|
6980
|
-
const child =
|
|
7547
|
+
const child = spawn6(wrappedCommand, {
|
|
6981
7548
|
shell: true,
|
|
6982
7549
|
cwd: options.cwd,
|
|
6983
7550
|
stdio: ["ignore", "ignore", "ignore"],
|
|
@@ -7005,7 +7572,7 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
7005
7572
|
const stderr = (await readFile10(stderrPath, "utf8")).replace(/\r\n/g, "\n");
|
|
7006
7573
|
return { stdout, stderr, exitCode };
|
|
7007
7574
|
} finally {
|
|
7008
|
-
await
|
|
7575
|
+
await rm6(dir, { recursive: true, force: true });
|
|
7009
7576
|
}
|
|
7010
7577
|
}
|
|
7011
7578
|
|
|
@@ -7274,7 +7841,7 @@ var CodeEvaluator = class {
|
|
|
7274
7841
|
outputMessages: context.outputMessages ?? null,
|
|
7275
7842
|
guidelineFiles: context.evalCase.guideline_paths,
|
|
7276
7843
|
inputFiles: context.evalCase.file_paths.filter(
|
|
7277
|
-
(
|
|
7844
|
+
(path27) => !context.evalCase.guideline_paths.includes(path27)
|
|
7278
7845
|
),
|
|
7279
7846
|
inputMessages: context.evalCase.input_messages,
|
|
7280
7847
|
traceSummary: context.traceSummary ?? null,
|
|
@@ -8194,115 +8761,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
8194
8761
|
* Evaluate a single field against the expected value.
|
|
8195
8762
|
*/
|
|
8196
8763
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
8197
|
-
const { path:
|
|
8198
|
-
const candidateValue = resolvePath(candidateData,
|
|
8199
|
-
const expectedValue = resolvePath(expectedData,
|
|
8764
|
+
const { path: path27, match, required = true, weight = 1 } = fieldConfig;
|
|
8765
|
+
const candidateValue = resolvePath(candidateData, path27);
|
|
8766
|
+
const expectedValue = resolvePath(expectedData, path27);
|
|
8200
8767
|
if (expectedValue === void 0) {
|
|
8201
8768
|
return {
|
|
8202
|
-
path:
|
|
8769
|
+
path: path27,
|
|
8203
8770
|
score: 1,
|
|
8204
8771
|
// No expected value means no comparison needed
|
|
8205
8772
|
weight,
|
|
8206
8773
|
hit: true,
|
|
8207
|
-
message: `${
|
|
8774
|
+
message: `${path27}: no expected value`
|
|
8208
8775
|
};
|
|
8209
8776
|
}
|
|
8210
8777
|
if (candidateValue === void 0) {
|
|
8211
8778
|
if (required) {
|
|
8212
8779
|
return {
|
|
8213
|
-
path:
|
|
8780
|
+
path: path27,
|
|
8214
8781
|
score: 0,
|
|
8215
8782
|
weight,
|
|
8216
8783
|
hit: false,
|
|
8217
|
-
message: `${
|
|
8784
|
+
message: `${path27} (required, missing)`
|
|
8218
8785
|
};
|
|
8219
8786
|
}
|
|
8220
8787
|
return {
|
|
8221
|
-
path:
|
|
8788
|
+
path: path27,
|
|
8222
8789
|
score: 1,
|
|
8223
8790
|
// Don't penalize missing optional fields
|
|
8224
8791
|
weight: 0,
|
|
8225
8792
|
// Zero weight means it won't affect the score
|
|
8226
8793
|
hit: true,
|
|
8227
|
-
message: `${
|
|
8794
|
+
message: `${path27}: optional field missing`
|
|
8228
8795
|
};
|
|
8229
8796
|
}
|
|
8230
8797
|
switch (match) {
|
|
8231
8798
|
case "exact":
|
|
8232
|
-
return this.compareExact(
|
|
8799
|
+
return this.compareExact(path27, candidateValue, expectedValue, weight);
|
|
8233
8800
|
case "numeric_tolerance":
|
|
8234
8801
|
return this.compareNumericTolerance(
|
|
8235
|
-
|
|
8802
|
+
path27,
|
|
8236
8803
|
candidateValue,
|
|
8237
8804
|
expectedValue,
|
|
8238
8805
|
fieldConfig,
|
|
8239
8806
|
weight
|
|
8240
8807
|
);
|
|
8241
8808
|
case "date":
|
|
8242
|
-
return this.compareDate(
|
|
8809
|
+
return this.compareDate(path27, candidateValue, expectedValue, fieldConfig, weight);
|
|
8243
8810
|
default:
|
|
8244
8811
|
return {
|
|
8245
|
-
path:
|
|
8812
|
+
path: path27,
|
|
8246
8813
|
score: 0,
|
|
8247
8814
|
weight,
|
|
8248
8815
|
hit: false,
|
|
8249
|
-
message: `${
|
|
8816
|
+
message: `${path27}: unknown match type "${match}"`
|
|
8250
8817
|
};
|
|
8251
8818
|
}
|
|
8252
8819
|
}
|
|
8253
8820
|
/**
|
|
8254
8821
|
* Exact equality comparison.
|
|
8255
8822
|
*/
|
|
8256
|
-
compareExact(
|
|
8823
|
+
compareExact(path27, candidateValue, expectedValue, weight) {
|
|
8257
8824
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
8258
8825
|
return {
|
|
8259
|
-
path:
|
|
8826
|
+
path: path27,
|
|
8260
8827
|
score: 1,
|
|
8261
8828
|
weight,
|
|
8262
8829
|
hit: true,
|
|
8263
|
-
message:
|
|
8830
|
+
message: path27
|
|
8264
8831
|
};
|
|
8265
8832
|
}
|
|
8266
8833
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
8267
8834
|
return {
|
|
8268
|
-
path:
|
|
8835
|
+
path: path27,
|
|
8269
8836
|
score: 0,
|
|
8270
8837
|
weight,
|
|
8271
8838
|
hit: false,
|
|
8272
|
-
message: `${
|
|
8839
|
+
message: `${path27} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
8273
8840
|
};
|
|
8274
8841
|
}
|
|
8275
8842
|
return {
|
|
8276
|
-
path:
|
|
8843
|
+
path: path27,
|
|
8277
8844
|
score: 0,
|
|
8278
8845
|
weight,
|
|
8279
8846
|
hit: false,
|
|
8280
|
-
message: `${
|
|
8847
|
+
message: `${path27} (value mismatch)`
|
|
8281
8848
|
};
|
|
8282
8849
|
}
|
|
8283
8850
|
/**
|
|
8284
8851
|
* Numeric comparison with absolute or relative tolerance.
|
|
8285
8852
|
*/
|
|
8286
|
-
compareNumericTolerance(
|
|
8853
|
+
compareNumericTolerance(path27, candidateValue, expectedValue, fieldConfig, weight) {
|
|
8287
8854
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
8288
8855
|
const candidateNum = toNumber(candidateValue);
|
|
8289
8856
|
const expectedNum = toNumber(expectedValue);
|
|
8290
8857
|
if (candidateNum === null || expectedNum === null) {
|
|
8291
8858
|
return {
|
|
8292
|
-
path:
|
|
8859
|
+
path: path27,
|
|
8293
8860
|
score: 0,
|
|
8294
8861
|
weight,
|
|
8295
8862
|
hit: false,
|
|
8296
|
-
message: `${
|
|
8863
|
+
message: `${path27} (non-numeric value)`
|
|
8297
8864
|
};
|
|
8298
8865
|
}
|
|
8299
8866
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
8300
8867
|
return {
|
|
8301
|
-
path:
|
|
8868
|
+
path: path27,
|
|
8302
8869
|
score: 0,
|
|
8303
8870
|
weight,
|
|
8304
8871
|
hit: false,
|
|
8305
|
-
message: `${
|
|
8872
|
+
message: `${path27} (invalid numeric value)`
|
|
8306
8873
|
};
|
|
8307
8874
|
}
|
|
8308
8875
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -8315,61 +8882,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
8315
8882
|
}
|
|
8316
8883
|
if (withinTolerance) {
|
|
8317
8884
|
return {
|
|
8318
|
-
path:
|
|
8885
|
+
path: path27,
|
|
8319
8886
|
score: 1,
|
|
8320
8887
|
weight,
|
|
8321
8888
|
hit: true,
|
|
8322
|
-
message: `${
|
|
8889
|
+
message: `${path27} (within tolerance: diff=${diff.toFixed(2)})`
|
|
8323
8890
|
};
|
|
8324
8891
|
}
|
|
8325
8892
|
return {
|
|
8326
|
-
path:
|
|
8893
|
+
path: path27,
|
|
8327
8894
|
score: 0,
|
|
8328
8895
|
weight,
|
|
8329
8896
|
hit: false,
|
|
8330
|
-
message: `${
|
|
8897
|
+
message: `${path27} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
8331
8898
|
};
|
|
8332
8899
|
}
|
|
8333
8900
|
/**
|
|
8334
8901
|
* Date comparison with format normalization.
|
|
8335
8902
|
*/
|
|
8336
|
-
compareDate(
|
|
8903
|
+
compareDate(path27, candidateValue, expectedValue, fieldConfig, weight) {
|
|
8337
8904
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
8338
8905
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
8339
8906
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
8340
8907
|
if (candidateDate === null) {
|
|
8341
8908
|
return {
|
|
8342
|
-
path:
|
|
8909
|
+
path: path27,
|
|
8343
8910
|
score: 0,
|
|
8344
8911
|
weight,
|
|
8345
8912
|
hit: false,
|
|
8346
|
-
message: `${
|
|
8913
|
+
message: `${path27} (unparseable candidate date)`
|
|
8347
8914
|
};
|
|
8348
8915
|
}
|
|
8349
8916
|
if (expectedDate === null) {
|
|
8350
8917
|
return {
|
|
8351
|
-
path:
|
|
8918
|
+
path: path27,
|
|
8352
8919
|
score: 0,
|
|
8353
8920
|
weight,
|
|
8354
8921
|
hit: false,
|
|
8355
|
-
message: `${
|
|
8922
|
+
message: `${path27} (unparseable expected date)`
|
|
8356
8923
|
};
|
|
8357
8924
|
}
|
|
8358
8925
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
8359
8926
|
return {
|
|
8360
|
-
path:
|
|
8927
|
+
path: path27,
|
|
8361
8928
|
score: 1,
|
|
8362
8929
|
weight,
|
|
8363
8930
|
hit: true,
|
|
8364
|
-
message:
|
|
8931
|
+
message: path27
|
|
8365
8932
|
};
|
|
8366
8933
|
}
|
|
8367
8934
|
return {
|
|
8368
|
-
path:
|
|
8935
|
+
path: path27,
|
|
8369
8936
|
score: 0,
|
|
8370
8937
|
weight,
|
|
8371
8938
|
hit: false,
|
|
8372
|
-
message: `${
|
|
8939
|
+
message: `${path27} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
8373
8940
|
};
|
|
8374
8941
|
}
|
|
8375
8942
|
/**
|
|
@@ -8409,11 +8976,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
8409
8976
|
};
|
|
8410
8977
|
}
|
|
8411
8978
|
};
|
|
8412
|
-
function resolvePath(obj,
|
|
8413
|
-
if (!
|
|
8979
|
+
function resolvePath(obj, path27) {
|
|
8980
|
+
if (!path27 || !obj) {
|
|
8414
8981
|
return void 0;
|
|
8415
8982
|
}
|
|
8416
|
-
const parts =
|
|
8983
|
+
const parts = path27.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
8417
8984
|
let current = obj;
|
|
8418
8985
|
for (const part of parts) {
|
|
8419
8986
|
if (current === null || current === void 0) {
|
|
@@ -8933,7 +9500,7 @@ var ToolTrajectoryEvaluator = class {
|
|
|
8933
9500
|
|
|
8934
9501
|
// src/evaluation/orchestrator.ts
|
|
8935
9502
|
import { createHash } from "node:crypto";
|
|
8936
|
-
import
|
|
9503
|
+
import path26 from "node:path";
|
|
8937
9504
|
import micromatch4 from "micromatch";
|
|
8938
9505
|
|
|
8939
9506
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -9736,7 +10303,7 @@ async function runEvaluatorList(options) {
|
|
|
9736
10303
|
});
|
|
9737
10304
|
}
|
|
9738
10305
|
if (evaluator.type === "composite") {
|
|
9739
|
-
const evalFileDir = evalCase.guideline_paths[0] ?
|
|
10306
|
+
const evalFileDir = evalCase.guideline_paths[0] ? path26.dirname(evalCase.guideline_paths[0]) : process.cwd();
|
|
9740
10307
|
const createEvaluator = (memberConfig) => {
|
|
9741
10308
|
switch (memberConfig.type) {
|
|
9742
10309
|
case "llm_judge":
|
|
@@ -10092,7 +10659,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
10092
10659
|
};
|
|
10093
10660
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
10094
10661
|
const scriptPath = script[script.length - 1];
|
|
10095
|
-
const cwd =
|
|
10662
|
+
const cwd = path26.dirname(scriptPath);
|
|
10096
10663
|
try {
|
|
10097
10664
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
10098
10665
|
const prompt = stdout.trim();
|
|
@@ -10366,6 +10933,7 @@ export {
|
|
|
10366
10933
|
computeTraceSummary,
|
|
10367
10934
|
consumeClaudeCodeLogEntries,
|
|
10368
10935
|
consumeCodexLogEntries,
|
|
10936
|
+
consumeCopilotCliLogEntries,
|
|
10369
10937
|
consumePiLogEntries,
|
|
10370
10938
|
createAgentKernel,
|
|
10371
10939
|
createProvider,
|
|
@@ -10405,6 +10973,7 @@ export {
|
|
|
10405
10973
|
scoreToVerdict,
|
|
10406
10974
|
subscribeToClaudeCodeLogEntries,
|
|
10407
10975
|
subscribeToCodexLogEntries,
|
|
10976
|
+
subscribeToCopilotCliLogEntries,
|
|
10408
10977
|
subscribeToPiLogEntries,
|
|
10409
10978
|
tokensPerTool
|
|
10410
10979
|
};
|