@agentv/core 3.11.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HMXZ2AX4.js → chunk-3G2KXH7N.js} +31 -23
- package/dist/chunk-3G2KXH7N.js.map +1 -0
- package/dist/{chunk-AVTN5AB7.js → chunk-4XWPXNQM.js} +62 -24
- package/dist/chunk-4XWPXNQM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +1120 -800
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +29 -8
- package/dist/index.d.ts +29 -8
- package/dist/index.js +956 -682
- package/dist/index.js.map +1 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ.js +7 -0
- package/package.json +9 -3
- package/dist/chunk-AVTN5AB7.js.map +0 -1
- package/dist/chunk-HMXZ2AX4.js.map +0 -1
- package/dist/simple-trace-file-exporter-S76DMABU.js +0 -7
- /package/dist/{simple-trace-file-exporter-S76DMABU.js.map → simple-trace-file-exporter-CRIO5HDZ.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -19,7 +19,7 @@ import {
|
|
|
19
19
|
readTextFile,
|
|
20
20
|
resolveFileReference,
|
|
21
21
|
resolveTargetDefinition
|
|
22
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-4XWPXNQM.js";
|
|
23
23
|
import {
|
|
24
24
|
AgentvProvider
|
|
25
25
|
} from "./chunk-W5YDZWT4.js";
|
|
@@ -28,7 +28,7 @@ import {
|
|
|
28
28
|
} from "./chunk-HFSYZHGF.js";
|
|
29
29
|
import {
|
|
30
30
|
SimpleTraceFileExporter
|
|
31
|
-
} from "./chunk-
|
|
31
|
+
} from "./chunk-3G2KXH7N.js";
|
|
32
32
|
|
|
33
33
|
// src/evaluation/trace.ts
|
|
34
34
|
function computeTraceSummary(messages) {
|
|
@@ -6793,265 +6793,7 @@ var MockProvider = class {
|
|
|
6793
6793
|
}
|
|
6794
6794
|
};
|
|
6795
6795
|
|
|
6796
|
-
// src/evaluation/providers/pi-
|
|
6797
|
-
function extractPiTextContent(content) {
|
|
6798
|
-
if (typeof content === "string") {
|
|
6799
|
-
return content;
|
|
6800
|
-
}
|
|
6801
|
-
if (!Array.isArray(content)) {
|
|
6802
|
-
return void 0;
|
|
6803
|
-
}
|
|
6804
|
-
const textParts = [];
|
|
6805
|
-
for (const part of content) {
|
|
6806
|
-
if (!part || typeof part !== "object") {
|
|
6807
|
-
continue;
|
|
6808
|
-
}
|
|
6809
|
-
const p = part;
|
|
6810
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
6811
|
-
textParts.push(p.text);
|
|
6812
|
-
}
|
|
6813
|
-
}
|
|
6814
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
6815
|
-
}
|
|
6816
|
-
function toFiniteNumber(value) {
|
|
6817
|
-
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
6818
|
-
return void 0;
|
|
6819
|
-
}
|
|
6820
|
-
|
|
6821
|
-
// src/evaluation/providers/pi-agent-sdk.ts
|
|
6822
|
-
var piAgentModule = null;
|
|
6823
|
-
var piAiModule = null;
|
|
6824
|
-
async function loadPiModules() {
|
|
6825
|
-
if (!piAgentModule || !piAiModule) {
|
|
6826
|
-
try {
|
|
6827
|
-
[piAgentModule, piAiModule] = await Promise.all([
|
|
6828
|
-
import("@mariozechner/pi-agent-core"),
|
|
6829
|
-
import("@mariozechner/pi-ai")
|
|
6830
|
-
]);
|
|
6831
|
-
} catch (error) {
|
|
6832
|
-
throw new Error(
|
|
6833
|
-
`Failed to load pi-agent-sdk dependencies. Please install them:
|
|
6834
|
-
npm install @mariozechner/pi-agent-core @mariozechner/pi-ai
|
|
6835
|
-
|
|
6836
|
-
Original error: ${error instanceof Error ? error.message : String(error)}`
|
|
6837
|
-
);
|
|
6838
|
-
}
|
|
6839
|
-
}
|
|
6840
|
-
return {
|
|
6841
|
-
Agent: piAgentModule.Agent,
|
|
6842
|
-
getModel: piAiModule.getModel,
|
|
6843
|
-
getEnvApiKey: piAiModule.getEnvApiKey
|
|
6844
|
-
};
|
|
6845
|
-
}
|
|
6846
|
-
var PiAgentSdkProvider = class {
|
|
6847
|
-
id;
|
|
6848
|
-
kind = "pi-agent-sdk";
|
|
6849
|
-
targetName;
|
|
6850
|
-
supportsBatch = false;
|
|
6851
|
-
config;
|
|
6852
|
-
constructor(targetName, config) {
|
|
6853
|
-
this.id = `pi-agent-sdk:${targetName}`;
|
|
6854
|
-
this.targetName = targetName;
|
|
6855
|
-
this.config = config;
|
|
6856
|
-
}
|
|
6857
|
-
async invoke(request) {
|
|
6858
|
-
if (request.signal?.aborted) {
|
|
6859
|
-
throw new Error("Pi agent SDK request was aborted before execution");
|
|
6860
|
-
}
|
|
6861
|
-
const { Agent, getModel, getEnvApiKey } = await loadPiModules();
|
|
6862
|
-
const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
|
|
6863
|
-
const startMs = Date.now();
|
|
6864
|
-
const providerName = this.config.subprovider ?? "anthropic";
|
|
6865
|
-
const modelId = this.config.model ?? "claude-sonnet-4-20250514";
|
|
6866
|
-
const model = getModel(providerName, modelId);
|
|
6867
|
-
const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
|
|
6868
|
-
const agent = new Agent({
|
|
6869
|
-
initialState: {
|
|
6870
|
-
systemPrompt,
|
|
6871
|
-
model,
|
|
6872
|
-
tools: [],
|
|
6873
|
-
// No tools for simple Q&A
|
|
6874
|
-
messages: []
|
|
6875
|
-
},
|
|
6876
|
-
getApiKey: async (provider) => {
|
|
6877
|
-
return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
|
|
6878
|
-
}
|
|
6879
|
-
});
|
|
6880
|
-
let tokenUsage;
|
|
6881
|
-
let costUsd;
|
|
6882
|
-
const toolTrackers = /* @__PURE__ */ new Map();
|
|
6883
|
-
const completedToolResults = /* @__PURE__ */ new Map();
|
|
6884
|
-
const unsubscribe = agent.subscribe((event) => {
|
|
6885
|
-
switch (event.type) {
|
|
6886
|
-
case "message_end": {
|
|
6887
|
-
const msg = event.message;
|
|
6888
|
-
if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
|
|
6889
|
-
const usage = msg.usage;
|
|
6890
|
-
if (usage && typeof usage === "object") {
|
|
6891
|
-
const u = usage;
|
|
6892
|
-
const input = toFiniteNumber(u.input);
|
|
6893
|
-
const output = toFiniteNumber(u.output);
|
|
6894
|
-
const cached = toFiniteNumber(u.cacheRead);
|
|
6895
|
-
let callDelta;
|
|
6896
|
-
if (input !== void 0 || output !== void 0) {
|
|
6897
|
-
callDelta = {
|
|
6898
|
-
input: input ?? 0,
|
|
6899
|
-
output: output ?? 0,
|
|
6900
|
-
...cached !== void 0 ? { cached } : {}
|
|
6901
|
-
};
|
|
6902
|
-
tokenUsage = {
|
|
6903
|
-
input: (tokenUsage?.input ?? 0) + callDelta.input,
|
|
6904
|
-
output: (tokenUsage?.output ?? 0) + callDelta.output,
|
|
6905
|
-
...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
|
|
6906
|
-
};
|
|
6907
|
-
}
|
|
6908
|
-
const cost = u.cost;
|
|
6909
|
-
if (cost && typeof cost === "object") {
|
|
6910
|
-
const total = toFiniteNumber(cost.total);
|
|
6911
|
-
if (total !== void 0) {
|
|
6912
|
-
costUsd = (costUsd ?? 0) + total;
|
|
6913
|
-
}
|
|
6914
|
-
}
|
|
6915
|
-
request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
|
|
6916
|
-
}
|
|
6917
|
-
}
|
|
6918
|
-
break;
|
|
6919
|
-
}
|
|
6920
|
-
case "tool_execution_start": {
|
|
6921
|
-
toolTrackers.set(event.toolCallId, {
|
|
6922
|
-
toolCallId: event.toolCallId,
|
|
6923
|
-
toolName: event.toolName,
|
|
6924
|
-
args: event.args,
|
|
6925
|
-
startMs: Date.now(),
|
|
6926
|
-
startTime: (/* @__PURE__ */ new Date()).toISOString()
|
|
6927
|
-
});
|
|
6928
|
-
request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
|
|
6929
|
-
break;
|
|
6930
|
-
}
|
|
6931
|
-
case "tool_execution_end": {
|
|
6932
|
-
const tracker = toolTrackers.get(event.toolCallId);
|
|
6933
|
-
const durationMs = tracker ? Date.now() - tracker.startMs : 0;
|
|
6934
|
-
completedToolResults.set(event.toolCallId, {
|
|
6935
|
-
output: event.result,
|
|
6936
|
-
durationMs
|
|
6937
|
-
});
|
|
6938
|
-
request.streamCallbacks?.onToolCallEnd?.(
|
|
6939
|
-
event.toolName,
|
|
6940
|
-
tracker?.args,
|
|
6941
|
-
event.result,
|
|
6942
|
-
durationMs,
|
|
6943
|
-
event.toolCallId
|
|
6944
|
-
);
|
|
6945
|
-
toolTrackers.delete(event.toolCallId);
|
|
6946
|
-
break;
|
|
6947
|
-
}
|
|
6948
|
-
}
|
|
6949
|
-
});
|
|
6950
|
-
try {
|
|
6951
|
-
if (this.config.timeoutMs) {
|
|
6952
|
-
const timeoutMs = this.config.timeoutMs;
|
|
6953
|
-
const timeoutPromise = new Promise((_, reject) => {
|
|
6954
|
-
setTimeout(
|
|
6955
|
-
() => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
|
|
6956
|
-
timeoutMs
|
|
6957
|
-
);
|
|
6958
|
-
});
|
|
6959
|
-
await Promise.race([agent.prompt(request.question), timeoutPromise]);
|
|
6960
|
-
} else {
|
|
6961
|
-
await agent.prompt(request.question);
|
|
6962
|
-
}
|
|
6963
|
-
await agent.waitForIdle();
|
|
6964
|
-
const agentMessages = agent.state.messages;
|
|
6965
|
-
const output = [];
|
|
6966
|
-
for (const msg of agentMessages) {
|
|
6967
|
-
output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
|
|
6968
|
-
}
|
|
6969
|
-
const endTimeIso = (/* @__PURE__ */ new Date()).toISOString();
|
|
6970
|
-
const durationMs = Date.now() - startMs;
|
|
6971
|
-
return {
|
|
6972
|
-
raw: {
|
|
6973
|
-
messages: agentMessages,
|
|
6974
|
-
systemPrompt,
|
|
6975
|
-
model: this.config.model,
|
|
6976
|
-
subprovider: this.config.subprovider
|
|
6977
|
-
},
|
|
6978
|
-
output,
|
|
6979
|
-
tokenUsage,
|
|
6980
|
-
costUsd,
|
|
6981
|
-
durationMs,
|
|
6982
|
-
startTime: startTimeIso,
|
|
6983
|
-
endTime: endTimeIso
|
|
6984
|
-
};
|
|
6985
|
-
} finally {
|
|
6986
|
-
unsubscribe();
|
|
6987
|
-
}
|
|
6988
|
-
}
|
|
6989
|
-
};
|
|
6990
|
-
function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
6991
|
-
if (!message || typeof message !== "object") {
|
|
6992
|
-
return { role: "unknown", content: String(message) };
|
|
6993
|
-
}
|
|
6994
|
-
const msg = message;
|
|
6995
|
-
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
6996
|
-
const content = extractPiTextContent(msg.content);
|
|
6997
|
-
const toolCalls = extractToolCalls3(msg.content, toolTrackers, completedToolResults);
|
|
6998
|
-
const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
6999
|
-
let msgTokenUsage;
|
|
7000
|
-
if (msg.usage && typeof msg.usage === "object") {
|
|
7001
|
-
const u = msg.usage;
|
|
7002
|
-
const input = toFiniteNumber(u.input);
|
|
7003
|
-
const output = toFiniteNumber(u.output);
|
|
7004
|
-
if (input !== void 0 || output !== void 0) {
|
|
7005
|
-
msgTokenUsage = {
|
|
7006
|
-
input: input ?? 0,
|
|
7007
|
-
output: output ?? 0,
|
|
7008
|
-
...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
|
|
7009
|
-
};
|
|
7010
|
-
}
|
|
7011
|
-
}
|
|
7012
|
-
const metadata = {};
|
|
7013
|
-
if (msg.api) metadata.api = msg.api;
|
|
7014
|
-
if (msg.provider) metadata.provider = msg.provider;
|
|
7015
|
-
if (msg.model) metadata.model = msg.model;
|
|
7016
|
-
if (msg.stopReason) metadata.stopReason = msg.stopReason;
|
|
7017
|
-
return {
|
|
7018
|
-
role,
|
|
7019
|
-
content,
|
|
7020
|
-
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
7021
|
-
startTime,
|
|
7022
|
-
metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
|
|
7023
|
-
tokenUsage: msgTokenUsage
|
|
7024
|
-
};
|
|
7025
|
-
}
|
|
7026
|
-
function extractToolCalls3(content, toolTrackers, completedToolResults) {
|
|
7027
|
-
if (!Array.isArray(content)) {
|
|
7028
|
-
return [];
|
|
7029
|
-
}
|
|
7030
|
-
const toolCalls = [];
|
|
7031
|
-
for (const part of content) {
|
|
7032
|
-
if (!part || typeof part !== "object") {
|
|
7033
|
-
continue;
|
|
7034
|
-
}
|
|
7035
|
-
const p = part;
|
|
7036
|
-
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
7037
|
-
const id = typeof p.id === "string" ? p.id : void 0;
|
|
7038
|
-
const tracker = id ? toolTrackers.get(id) : void 0;
|
|
7039
|
-
const completed = id ? completedToolResults.get(id) : void 0;
|
|
7040
|
-
toolCalls.push({
|
|
7041
|
-
tool: p.name,
|
|
7042
|
-
input: p.arguments,
|
|
7043
|
-
id,
|
|
7044
|
-
output: completed?.output,
|
|
7045
|
-
durationMs: completed?.durationMs,
|
|
7046
|
-
startTime: tracker?.startTime,
|
|
7047
|
-
endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
|
|
7048
|
-
});
|
|
7049
|
-
}
|
|
7050
|
-
}
|
|
7051
|
-
return toolCalls;
|
|
7052
|
-
}
|
|
7053
|
-
|
|
7054
|
-
// src/evaluation/providers/pi-coding-agent.ts
|
|
6796
|
+
// src/evaluation/providers/pi-cli.ts
|
|
7055
6797
|
import { spawn as spawn3 } from "node:child_process";
|
|
7056
6798
|
import { randomUUID as randomUUID7 } from "node:crypto";
|
|
7057
6799
|
import { createWriteStream as createWriteStream5 } from "node:fs";
|
|
@@ -7112,25 +6854,50 @@ function subscribeToPiLogEntries(listener) {
|
|
|
7112
6854
|
};
|
|
7113
6855
|
}
|
|
7114
6856
|
|
|
7115
|
-
// src/evaluation/providers/pi-
|
|
6857
|
+
// src/evaluation/providers/pi-utils.ts
|
|
6858
|
+
function extractPiTextContent(content) {
|
|
6859
|
+
if (typeof content === "string") {
|
|
6860
|
+
return content;
|
|
6861
|
+
}
|
|
6862
|
+
if (!Array.isArray(content)) {
|
|
6863
|
+
return void 0;
|
|
6864
|
+
}
|
|
6865
|
+
const textParts = [];
|
|
6866
|
+
for (const part of content) {
|
|
6867
|
+
if (!part || typeof part !== "object") {
|
|
6868
|
+
continue;
|
|
6869
|
+
}
|
|
6870
|
+
const p = part;
|
|
6871
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
6872
|
+
textParts.push(p.text);
|
|
6873
|
+
}
|
|
6874
|
+
}
|
|
6875
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
6876
|
+
}
|
|
6877
|
+
function toFiniteNumber(value) {
|
|
6878
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
6879
|
+
return void 0;
|
|
6880
|
+
}
|
|
6881
|
+
|
|
6882
|
+
// src/evaluation/providers/pi-cli.ts
|
|
7116
6883
|
var WORKSPACE_PREFIX = "agentv-pi-";
|
|
7117
6884
|
var PROMPT_FILENAME = "prompt.md";
|
|
7118
|
-
var
|
|
6885
|
+
var PiCliProvider = class {
|
|
7119
6886
|
id;
|
|
7120
|
-
kind = "pi-
|
|
6887
|
+
kind = "pi-cli";
|
|
7121
6888
|
targetName;
|
|
7122
6889
|
supportsBatch = false;
|
|
7123
6890
|
config;
|
|
7124
6891
|
runPi;
|
|
7125
6892
|
constructor(targetName, config, runner = defaultPiRunner) {
|
|
7126
|
-
this.id = `pi-
|
|
6893
|
+
this.id = `pi-cli:${targetName}`;
|
|
7127
6894
|
this.targetName = targetName;
|
|
7128
6895
|
this.config = config;
|
|
7129
6896
|
this.runPi = runner;
|
|
7130
6897
|
}
|
|
7131
6898
|
async invoke(request) {
|
|
7132
6899
|
if (request.signal?.aborted) {
|
|
7133
|
-
throw new Error("Pi
|
|
6900
|
+
throw new Error("Pi CLI request was aborted before execution");
|
|
7134
6901
|
}
|
|
7135
6902
|
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
7136
6903
|
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -7140,17 +6907,17 @@ var PiCodingAgentProvider = class {
|
|
|
7140
6907
|
try {
|
|
7141
6908
|
const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
|
|
7142
6909
|
await writeFile(promptFile, request.question, "utf8");
|
|
7143
|
-
const args = this.buildPiArgs(request.question, inputFiles
|
|
6910
|
+
const args = this.buildPiArgs(request.question, inputFiles);
|
|
7144
6911
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
7145
6912
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
7146
6913
|
if (result.timedOut) {
|
|
7147
6914
|
throw new Error(
|
|
7148
|
-
`Pi
|
|
6915
|
+
`Pi CLI timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
|
|
7149
6916
|
);
|
|
7150
6917
|
}
|
|
7151
6918
|
if (result.exitCode !== 0) {
|
|
7152
6919
|
const detail = pickDetail(result.stderr, result.stdout);
|
|
7153
|
-
const prefix = `Pi
|
|
6920
|
+
const prefix = `Pi CLI exited with code ${result.exitCode}`;
|
|
7154
6921
|
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
7155
6922
|
}
|
|
7156
6923
|
const parsed = parsePiJsonl(result.stdout);
|
|
@@ -7207,7 +6974,7 @@ var PiCodingAgentProvider = class {
|
|
|
7207
6974
|
}
|
|
7208
6975
|
return path17.resolve(this.config.cwd);
|
|
7209
6976
|
}
|
|
7210
|
-
buildPiArgs(prompt, inputFiles
|
|
6977
|
+
buildPiArgs(prompt, inputFiles) {
|
|
7211
6978
|
const args = [];
|
|
7212
6979
|
if (this.config.subprovider) {
|
|
7213
6980
|
args.push("--provider", this.config.subprovider);
|
|
@@ -7259,7 +7026,7 @@ ${prompt}` : prompt;
|
|
|
7259
7026
|
const err = error;
|
|
7260
7027
|
if (err.code === "ENOENT") {
|
|
7261
7028
|
throw new Error(
|
|
7262
|
-
`Pi
|
|
7029
|
+
`Pi CLI executable '${this.config.executable}' was not found. Update the target executable or add it to PATH.`
|
|
7263
7030
|
);
|
|
7264
7031
|
}
|
|
7265
7032
|
throw error;
|
|
@@ -7269,26 +7036,18 @@ ${prompt}` : prompt;
|
|
|
7269
7036
|
const env = { ...process.env };
|
|
7270
7037
|
if (this.config.apiKey) {
|
|
7271
7038
|
const provider = this.config.subprovider?.toLowerCase() ?? "google";
|
|
7272
|
-
|
|
7273
|
-
|
|
7274
|
-
|
|
7275
|
-
|
|
7276
|
-
|
|
7277
|
-
|
|
7278
|
-
|
|
7279
|
-
|
|
7280
|
-
|
|
7281
|
-
|
|
7282
|
-
|
|
7283
|
-
|
|
7284
|
-
env.GROQ_API_KEY = this.config.apiKey;
|
|
7285
|
-
break;
|
|
7286
|
-
case "xai":
|
|
7287
|
-
env.XAI_API_KEY = this.config.apiKey;
|
|
7288
|
-
break;
|
|
7289
|
-
case "openrouter":
|
|
7290
|
-
env.OPENROUTER_API_KEY = this.config.apiKey;
|
|
7291
|
-
break;
|
|
7039
|
+
const ENV_KEY_MAP = {
|
|
7040
|
+
google: "GEMINI_API_KEY",
|
|
7041
|
+
gemini: "GEMINI_API_KEY",
|
|
7042
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
7043
|
+
openai: "OPENAI_API_KEY",
|
|
7044
|
+
groq: "GROQ_API_KEY",
|
|
7045
|
+
xai: "XAI_API_KEY",
|
|
7046
|
+
openrouter: "OPENROUTER_API_KEY"
|
|
7047
|
+
};
|
|
7048
|
+
const envKey = ENV_KEY_MAP[provider];
|
|
7049
|
+
if (envKey) {
|
|
7050
|
+
env[envKey] = this.config.apiKey;
|
|
7292
7051
|
}
|
|
7293
7052
|
}
|
|
7294
7053
|
return env;
|
|
@@ -7306,7 +7065,7 @@ ${prompt}` : prompt;
|
|
|
7306
7065
|
if (this.config.logDir) {
|
|
7307
7066
|
return path17.resolve(this.config.logDir);
|
|
7308
7067
|
}
|
|
7309
|
-
return path17.join(process.cwd(), ".agentv", "logs", "pi-
|
|
7068
|
+
return path17.join(process.cwd(), ".agentv", "logs", "pi-cli");
|
|
7310
7069
|
}
|
|
7311
7070
|
async createStreamLogger(request) {
|
|
7312
7071
|
const logDir = this.resolveLogDirectory();
|
|
@@ -7358,7 +7117,7 @@ var PiStreamLogger = class _PiStreamLogger {
|
|
|
7358
7117
|
static async create(options) {
|
|
7359
7118
|
const logger = new _PiStreamLogger(options.filePath, options.format);
|
|
7360
7119
|
const header = [
|
|
7361
|
-
"# Pi
|
|
7120
|
+
"# Pi CLI stream log",
|
|
7362
7121
|
`# target: ${options.targetName}`,
|
|
7363
7122
|
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
7364
7123
|
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
@@ -7507,10 +7266,10 @@ function summarizePiEvent(event) {
|
|
|
7507
7266
|
return `${type}: ${role}`;
|
|
7508
7267
|
}
|
|
7509
7268
|
case "message_update": {
|
|
7510
|
-
const
|
|
7511
|
-
const eventType =
|
|
7269
|
+
const evt = record.assistantMessageEvent;
|
|
7270
|
+
const eventType = evt?.type;
|
|
7512
7271
|
if (eventType === "text_delta") {
|
|
7513
|
-
const delta =
|
|
7272
|
+
const delta = evt?.delta;
|
|
7514
7273
|
if (typeof delta === "string") {
|
|
7515
7274
|
const preview = delta.length > 50 ? `${delta.slice(0, 50)}...` : delta;
|
|
7516
7275
|
return `text_delta: ${preview}`;
|
|
@@ -7532,7 +7291,7 @@ function tryParseJsonValue(rawLine) {
|
|
|
7532
7291
|
function parsePiJsonl(output) {
|
|
7533
7292
|
const trimmed = output.trim();
|
|
7534
7293
|
if (trimmed.length === 0) {
|
|
7535
|
-
throw new Error("Pi
|
|
7294
|
+
throw new Error("Pi CLI produced no output");
|
|
7536
7295
|
}
|
|
7537
7296
|
const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
7538
7297
|
const parsed = [];
|
|
@@ -7543,38 +7302,27 @@ function parsePiJsonl(output) {
|
|
|
7543
7302
|
}
|
|
7544
7303
|
}
|
|
7545
7304
|
if (parsed.length === 0) {
|
|
7546
|
-
throw new Error("Pi
|
|
7305
|
+
throw new Error("Pi CLI produced no valid JSON output");
|
|
7547
7306
|
}
|
|
7548
7307
|
return parsed;
|
|
7549
7308
|
}
|
|
7550
7309
|
function extractMessages(events) {
|
|
7551
7310
|
for (let i = events.length - 1; i >= 0; i--) {
|
|
7552
7311
|
const event = events[i];
|
|
7553
|
-
if (!event || typeof event !== "object")
|
|
7554
|
-
continue;
|
|
7555
|
-
}
|
|
7312
|
+
if (!event || typeof event !== "object") continue;
|
|
7556
7313
|
const record = event;
|
|
7557
|
-
if (record.type !== "agent_end")
|
|
7558
|
-
continue;
|
|
7559
|
-
}
|
|
7314
|
+
if (record.type !== "agent_end") continue;
|
|
7560
7315
|
const messages = record.messages;
|
|
7561
|
-
if (!Array.isArray(messages))
|
|
7562
|
-
continue;
|
|
7563
|
-
}
|
|
7316
|
+
if (!Array.isArray(messages)) continue;
|
|
7564
7317
|
return messages.map(convertPiMessage).filter((m) => m !== void 0);
|
|
7565
7318
|
}
|
|
7566
7319
|
const output = [];
|
|
7567
7320
|
for (const event of events) {
|
|
7568
|
-
if (!event || typeof event !== "object")
|
|
7569
|
-
continue;
|
|
7570
|
-
}
|
|
7321
|
+
if (!event || typeof event !== "object") continue;
|
|
7571
7322
|
const record = event;
|
|
7572
7323
|
if (record.type === "turn_end") {
|
|
7573
|
-
const
|
|
7574
|
-
|
|
7575
|
-
if (converted) {
|
|
7576
|
-
output.push(converted);
|
|
7577
|
-
}
|
|
7324
|
+
const converted = convertPiMessage(record.message);
|
|
7325
|
+
if (converted) output.push(converted);
|
|
7578
7326
|
}
|
|
7579
7327
|
}
|
|
7580
7328
|
return output;
|
|
@@ -7591,10 +7339,7 @@ function extractTokenUsage(events) {
|
|
|
7591
7339
|
const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
|
|
7592
7340
|
const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
|
|
7593
7341
|
if (input !== void 0 || output !== void 0) {
|
|
7594
|
-
const result = {
|
|
7595
|
-
input: input ?? 0,
|
|
7596
|
-
output: output ?? 0
|
|
7597
|
-
};
|
|
7342
|
+
const result = { input: input ?? 0, output: output ?? 0 };
|
|
7598
7343
|
const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
|
|
7599
7344
|
const reasoning = toFiniteNumber(u.reasoning_tokens ?? u.reasoningTokens ?? u.reasoning);
|
|
7600
7345
|
return {
|
|
@@ -7634,40 +7379,577 @@ function aggregateUsageFromMessages(messages) {
|
|
|
7634
7379
|
}
|
|
7635
7380
|
}
|
|
7636
7381
|
}
|
|
7637
|
-
if (!found) return void 0;
|
|
7638
|
-
const result = { input: totalInput, output: totalOutput };
|
|
7639
|
-
if (totalCached !== void 0) {
|
|
7640
|
-
return { ...result, cached: totalCached };
|
|
7382
|
+
if (!found) return void 0;
|
|
7383
|
+
const result = { input: totalInput, output: totalOutput };
|
|
7384
|
+
if (totalCached !== void 0) {
|
|
7385
|
+
return { ...result, cached: totalCached };
|
|
7386
|
+
}
|
|
7387
|
+
return result;
|
|
7388
|
+
}
|
|
7389
|
+
function convertPiMessage(message) {
|
|
7390
|
+
if (!message || typeof message !== "object") return void 0;
|
|
7391
|
+
const msg = message;
|
|
7392
|
+
const role = msg.role;
|
|
7393
|
+
if (typeof role !== "string") return void 0;
|
|
7394
|
+
const content = extractPiTextContent(msg.content);
|
|
7395
|
+
const toolCalls = extractToolCalls3(msg.content);
|
|
7396
|
+
const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
7397
|
+
const metadata = {};
|
|
7398
|
+
if (msg.api) metadata.api = msg.api;
|
|
7399
|
+
if (msg.provider) metadata.provider = msg.provider;
|
|
7400
|
+
if (msg.model) metadata.model = msg.model;
|
|
7401
|
+
if (msg.usage) metadata.usage = msg.usage;
|
|
7402
|
+
if (msg.stopReason) metadata.stopReason = msg.stopReason;
|
|
7403
|
+
return {
|
|
7404
|
+
role,
|
|
7405
|
+
content,
|
|
7406
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
7407
|
+
startTime,
|
|
7408
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
7409
|
+
};
|
|
7410
|
+
}
|
|
7411
|
+
function extractToolCalls3(content) {
|
|
7412
|
+
if (!Array.isArray(content)) return [];
|
|
7413
|
+
const toolCalls = [];
|
|
7414
|
+
for (const part of content) {
|
|
7415
|
+
if (!part || typeof part !== "object") continue;
|
|
7416
|
+
const p = part;
|
|
7417
|
+
if (p.type === "tool_use" && typeof p.name === "string") {
|
|
7418
|
+
toolCalls.push({
|
|
7419
|
+
tool: p.name,
|
|
7420
|
+
input: p.input,
|
|
7421
|
+
id: typeof p.id === "string" ? p.id : void 0
|
|
7422
|
+
});
|
|
7423
|
+
}
|
|
7424
|
+
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
7425
|
+
toolCalls.push({
|
|
7426
|
+
tool: p.name,
|
|
7427
|
+
input: p.arguments,
|
|
7428
|
+
id: typeof p.id === "string" ? p.id : void 0
|
|
7429
|
+
});
|
|
7430
|
+
}
|
|
7431
|
+
if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
|
|
7432
|
+
const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
|
|
7433
|
+
if (existing) {
|
|
7434
|
+
const idx = toolCalls.indexOf(existing);
|
|
7435
|
+
toolCalls[idx] = { ...existing, output: p.content };
|
|
7436
|
+
}
|
|
7437
|
+
}
|
|
7438
|
+
}
|
|
7439
|
+
return toolCalls;
|
|
7440
|
+
}
|
|
7441
|
+
function escapeAtSymbols(prompt) {
|
|
7442
|
+
return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
|
|
7443
|
+
}
|
|
7444
|
+
function pickDetail(stderr, stdout) {
|
|
7445
|
+
const errorText = stderr.trim();
|
|
7446
|
+
if (errorText.length > 0) return errorText;
|
|
7447
|
+
const stdoutText = stdout.trim();
|
|
7448
|
+
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
7449
|
+
}
|
|
7450
|
+
function formatTimeoutSuffix3(timeoutMs) {
|
|
7451
|
+
if (!timeoutMs || timeoutMs <= 0) return "";
|
|
7452
|
+
return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
|
|
7453
|
+
}
|
|
7454
|
+
async function defaultPiRunner(options) {
|
|
7455
|
+
return await new Promise((resolve, reject) => {
|
|
7456
|
+
const parts = options.executable.split(/\s+/);
|
|
7457
|
+
const executable = parts[0];
|
|
7458
|
+
const executableArgs = parts.slice(1);
|
|
7459
|
+
const allArgs = [...executableArgs, ...options.args];
|
|
7460
|
+
const child = spawn3(executable, allArgs, {
|
|
7461
|
+
cwd: options.cwd,
|
|
7462
|
+
env: options.env,
|
|
7463
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
7464
|
+
shell: false
|
|
7465
|
+
});
|
|
7466
|
+
let stdout = "";
|
|
7467
|
+
let stderr = "";
|
|
7468
|
+
let timedOut = false;
|
|
7469
|
+
const onAbort = () => {
|
|
7470
|
+
child.kill("SIGTERM");
|
|
7471
|
+
};
|
|
7472
|
+
if (options.signal) {
|
|
7473
|
+
if (options.signal.aborted) {
|
|
7474
|
+
onAbort();
|
|
7475
|
+
} else {
|
|
7476
|
+
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
7477
|
+
}
|
|
7478
|
+
}
|
|
7479
|
+
let timeoutHandle;
|
|
7480
|
+
if (options.timeoutMs && options.timeoutMs > 0) {
|
|
7481
|
+
timeoutHandle = setTimeout(() => {
|
|
7482
|
+
timedOut = true;
|
|
7483
|
+
child.kill("SIGTERM");
|
|
7484
|
+
}, options.timeoutMs);
|
|
7485
|
+
timeoutHandle.unref?.();
|
|
7486
|
+
}
|
|
7487
|
+
child.stdout.setEncoding("utf8");
|
|
7488
|
+
child.stdout.on("data", (chunk) => {
|
|
7489
|
+
stdout += chunk;
|
|
7490
|
+
options.onStdoutChunk?.(chunk);
|
|
7491
|
+
});
|
|
7492
|
+
child.stderr.setEncoding("utf8");
|
|
7493
|
+
child.stderr.on("data", (chunk) => {
|
|
7494
|
+
stderr += chunk;
|
|
7495
|
+
options.onStderrChunk?.(chunk);
|
|
7496
|
+
});
|
|
7497
|
+
child.stdin.end();
|
|
7498
|
+
const cleanup = () => {
|
|
7499
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
7500
|
+
if (options.signal) options.signal.removeEventListener("abort", onAbort);
|
|
7501
|
+
};
|
|
7502
|
+
child.on("error", (error) => {
|
|
7503
|
+
cleanup();
|
|
7504
|
+
reject(error);
|
|
7505
|
+
});
|
|
7506
|
+
child.on("close", (code) => {
|
|
7507
|
+
cleanup();
|
|
7508
|
+
resolve({
|
|
7509
|
+
stdout,
|
|
7510
|
+
stderr,
|
|
7511
|
+
exitCode: typeof code === "number" ? code : -1,
|
|
7512
|
+
timedOut
|
|
7513
|
+
});
|
|
7514
|
+
});
|
|
7515
|
+
});
|
|
7516
|
+
}
|
|
7517
|
+
|
|
7518
|
+
// src/evaluation/providers/pi-coding-agent.ts
|
|
7519
|
+
import { execSync } from "node:child_process";
|
|
7520
|
+
import { randomUUID as randomUUID8 } from "node:crypto";
|
|
7521
|
+
import { createWriteStream as createWriteStream6 } from "node:fs";
|
|
7522
|
+
import { mkdir as mkdir7 } from "node:fs/promises";
|
|
7523
|
+
import path18 from "node:path";
|
|
7524
|
+
import { createInterface } from "node:readline";
|
|
7525
|
+
var piCodingAgentModule = null;
|
|
7526
|
+
var piAiModule = null;
|
|
7527
|
+
async function promptInstall() {
|
|
7528
|
+
if (!process.stdout.isTTY) return false;
|
|
7529
|
+
const rl = createInterface({ input: process.stdin, output: process.stderr });
|
|
7530
|
+
try {
|
|
7531
|
+
return await new Promise((resolve) => {
|
|
7532
|
+
rl.question(
|
|
7533
|
+
"@mariozechner/pi-coding-agent is not installed. Install it now? (y/N) ",
|
|
7534
|
+
(answer) => resolve(answer.trim().toLowerCase() === "y")
|
|
7535
|
+
);
|
|
7536
|
+
});
|
|
7537
|
+
} finally {
|
|
7538
|
+
rl.close();
|
|
7539
|
+
}
|
|
7540
|
+
}
|
|
7541
|
+
async function loadSdkModules() {
|
|
7542
|
+
if (!piCodingAgentModule || !piAiModule) {
|
|
7543
|
+
try {
|
|
7544
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
7545
|
+
import("@mariozechner/pi-coding-agent"),
|
|
7546
|
+
import("@mariozechner/pi-ai")
|
|
7547
|
+
]);
|
|
7548
|
+
} catch {
|
|
7549
|
+
if (await promptInstall()) {
|
|
7550
|
+
console.error("Installing @mariozechner/pi-coding-agent...");
|
|
7551
|
+
execSync("bun add @mariozechner/pi-coding-agent", { stdio: "inherit" });
|
|
7552
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
7553
|
+
import("@mariozechner/pi-coding-agent"),
|
|
7554
|
+
import("@mariozechner/pi-ai")
|
|
7555
|
+
]);
|
|
7556
|
+
} else {
|
|
7557
|
+
throw new Error(
|
|
7558
|
+
"pi-coding-agent SDK is not installed. Install it with:\n bun add @mariozechner/pi-coding-agent"
|
|
7559
|
+
);
|
|
7560
|
+
}
|
|
7561
|
+
}
|
|
7562
|
+
}
|
|
7563
|
+
const toolMap = {
|
|
7564
|
+
read: piCodingAgentModule.readTool,
|
|
7565
|
+
bash: piCodingAgentModule.bashTool,
|
|
7566
|
+
edit: piCodingAgentModule.editTool,
|
|
7567
|
+
write: piCodingAgentModule.writeTool,
|
|
7568
|
+
grep: piCodingAgentModule.grepTool,
|
|
7569
|
+
find: piCodingAgentModule.findTool,
|
|
7570
|
+
ls: piCodingAgentModule.lsTool
|
|
7571
|
+
};
|
|
7572
|
+
return {
|
|
7573
|
+
createAgentSession: piCodingAgentModule.createAgentSession,
|
|
7574
|
+
codingTools: piCodingAgentModule.codingTools,
|
|
7575
|
+
toolMap,
|
|
7576
|
+
SessionManager: piCodingAgentModule.SessionManager,
|
|
7577
|
+
getModel: piAiModule.getModel
|
|
7578
|
+
};
|
|
7579
|
+
}
|
|
7580
|
+
var PiCodingAgentProvider = class {
|
|
7581
|
+
id;
|
|
7582
|
+
kind = "pi-coding-agent";
|
|
7583
|
+
targetName;
|
|
7584
|
+
supportsBatch = false;
|
|
7585
|
+
config;
|
|
7586
|
+
constructor(targetName, config) {
|
|
7587
|
+
this.id = `pi-coding-agent:${targetName}`;
|
|
7588
|
+
this.targetName = targetName;
|
|
7589
|
+
this.config = config;
|
|
7590
|
+
}
|
|
7591
|
+
async invoke(request) {
|
|
7592
|
+
if (request.signal?.aborted) {
|
|
7593
|
+
throw new Error("Pi coding agent request was aborted before execution");
|
|
7594
|
+
}
|
|
7595
|
+
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
7596
|
+
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
7597
|
+
const startMs = Date.now();
|
|
7598
|
+
const sdk = await loadSdkModules();
|
|
7599
|
+
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
7600
|
+
try {
|
|
7601
|
+
const cwd = this.resolveCwd(request.cwd);
|
|
7602
|
+
const providerName = this.config.subprovider ?? "google";
|
|
7603
|
+
const modelId = this.config.model ?? "gemini-2.5-flash";
|
|
7604
|
+
this.setApiKeyEnv(providerName);
|
|
7605
|
+
const model = sdk.getModel(providerName, modelId);
|
|
7606
|
+
const tools = this.resolveTools(sdk);
|
|
7607
|
+
const { session } = await sdk.createAgentSession({
|
|
7608
|
+
cwd,
|
|
7609
|
+
model,
|
|
7610
|
+
tools,
|
|
7611
|
+
thinkingLevel: this.config.thinking,
|
|
7612
|
+
sessionManager: sdk.SessionManager.inMemory(cwd)
|
|
7613
|
+
});
|
|
7614
|
+
let tokenUsage;
|
|
7615
|
+
let costUsd;
|
|
7616
|
+
const toolTrackers = /* @__PURE__ */ new Map();
|
|
7617
|
+
const completedToolResults = /* @__PURE__ */ new Map();
|
|
7618
|
+
const unsubscribe = session.subscribe((event) => {
|
|
7619
|
+
logger?.handleEvent(event);
|
|
7620
|
+
switch (event.type) {
|
|
7621
|
+
case "message_end": {
|
|
7622
|
+
const msg = event.message;
|
|
7623
|
+
if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
|
|
7624
|
+
const usage = msg.usage;
|
|
7625
|
+
if (usage && typeof usage === "object") {
|
|
7626
|
+
const u = usage;
|
|
7627
|
+
const input = toFiniteNumber(u.input);
|
|
7628
|
+
const output = toFiniteNumber(u.output);
|
|
7629
|
+
const cached = toFiniteNumber(u.cacheRead);
|
|
7630
|
+
let callDelta;
|
|
7631
|
+
if (input !== void 0 || output !== void 0) {
|
|
7632
|
+
callDelta = {
|
|
7633
|
+
input: input ?? 0,
|
|
7634
|
+
output: output ?? 0,
|
|
7635
|
+
...cached !== void 0 ? { cached } : {}
|
|
7636
|
+
};
|
|
7637
|
+
tokenUsage = {
|
|
7638
|
+
input: (tokenUsage?.input ?? 0) + callDelta.input,
|
|
7639
|
+
output: (tokenUsage?.output ?? 0) + callDelta.output,
|
|
7640
|
+
...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
|
|
7641
|
+
};
|
|
7642
|
+
}
|
|
7643
|
+
const cost = u.cost;
|
|
7644
|
+
if (cost && typeof cost === "object") {
|
|
7645
|
+
const total = toFiniteNumber(cost.total);
|
|
7646
|
+
if (total !== void 0) {
|
|
7647
|
+
costUsd = (costUsd ?? 0) + total;
|
|
7648
|
+
}
|
|
7649
|
+
}
|
|
7650
|
+
request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
|
|
7651
|
+
}
|
|
7652
|
+
}
|
|
7653
|
+
break;
|
|
7654
|
+
}
|
|
7655
|
+
case "tool_execution_start": {
|
|
7656
|
+
toolTrackers.set(event.toolCallId, {
|
|
7657
|
+
toolCallId: event.toolCallId,
|
|
7658
|
+
toolName: event.toolName,
|
|
7659
|
+
args: event.args,
|
|
7660
|
+
startMs: Date.now(),
|
|
7661
|
+
startTime: (/* @__PURE__ */ new Date()).toISOString()
|
|
7662
|
+
});
|
|
7663
|
+
request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
|
|
7664
|
+
break;
|
|
7665
|
+
}
|
|
7666
|
+
case "tool_execution_end": {
|
|
7667
|
+
const tracker = toolTrackers.get(event.toolCallId);
|
|
7668
|
+
const durationMs = tracker ? Date.now() - tracker.startMs : 0;
|
|
7669
|
+
completedToolResults.set(event.toolCallId, {
|
|
7670
|
+
output: event.result,
|
|
7671
|
+
durationMs
|
|
7672
|
+
});
|
|
7673
|
+
request.streamCallbacks?.onToolCallEnd?.(
|
|
7674
|
+
event.toolName,
|
|
7675
|
+
tracker?.args,
|
|
7676
|
+
event.result,
|
|
7677
|
+
durationMs,
|
|
7678
|
+
event.toolCallId
|
|
7679
|
+
);
|
|
7680
|
+
toolTrackers.delete(event.toolCallId);
|
|
7681
|
+
break;
|
|
7682
|
+
}
|
|
7683
|
+
}
|
|
7684
|
+
});
|
|
7685
|
+
try {
|
|
7686
|
+
const systemPrompt = this.config.systemPrompt;
|
|
7687
|
+
let prompt = request.question;
|
|
7688
|
+
if (systemPrompt) {
|
|
7689
|
+
prompt = `${systemPrompt}
|
|
7690
|
+
|
|
7691
|
+
${prompt}`;
|
|
7692
|
+
}
|
|
7693
|
+
if (inputFiles && inputFiles.length > 0) {
|
|
7694
|
+
const fileList = inputFiles.map((f) => `@${f}`).join("\n");
|
|
7695
|
+
prompt = `${prompt}
|
|
7696
|
+
|
|
7697
|
+
Files:
|
|
7698
|
+
${fileList}`;
|
|
7699
|
+
}
|
|
7700
|
+
if (this.config.timeoutMs) {
|
|
7701
|
+
const timeoutMs = this.config.timeoutMs;
|
|
7702
|
+
let timeoutId;
|
|
7703
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
7704
|
+
timeoutId = setTimeout(
|
|
7705
|
+
() => reject(
|
|
7706
|
+
new Error(`Pi coding agent timed out after ${Math.ceil(timeoutMs / 1e3)}s`)
|
|
7707
|
+
),
|
|
7708
|
+
timeoutMs
|
|
7709
|
+
);
|
|
7710
|
+
});
|
|
7711
|
+
try {
|
|
7712
|
+
await Promise.race([session.prompt(prompt), timeoutPromise]);
|
|
7713
|
+
} finally {
|
|
7714
|
+
if (timeoutId !== void 0) clearTimeout(timeoutId);
|
|
7715
|
+
}
|
|
7716
|
+
} else {
|
|
7717
|
+
await session.prompt(prompt);
|
|
7718
|
+
}
|
|
7719
|
+
const agentMessages = session.agent.state.messages;
|
|
7720
|
+
const output = [];
|
|
7721
|
+
for (const msg of agentMessages) {
|
|
7722
|
+
output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
|
|
7723
|
+
}
|
|
7724
|
+
const endTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
7725
|
+
const durationMs = Date.now() - startMs;
|
|
7726
|
+
return {
|
|
7727
|
+
raw: {
|
|
7728
|
+
messages: agentMessages,
|
|
7729
|
+
model: this.config.model,
|
|
7730
|
+
provider: this.config.subprovider
|
|
7731
|
+
},
|
|
7732
|
+
output,
|
|
7733
|
+
tokenUsage,
|
|
7734
|
+
costUsd,
|
|
7735
|
+
durationMs,
|
|
7736
|
+
startTime,
|
|
7737
|
+
endTime
|
|
7738
|
+
};
|
|
7739
|
+
} finally {
|
|
7740
|
+
unsubscribe();
|
|
7741
|
+
session.dispose();
|
|
7742
|
+
}
|
|
7743
|
+
} finally {
|
|
7744
|
+
await logger?.close();
|
|
7745
|
+
}
|
|
7746
|
+
}
|
|
7747
|
+
/** Maps config apiKey to the provider-specific env var the SDK reads. */
|
|
7748
|
+
setApiKeyEnv(providerName) {
|
|
7749
|
+
if (!this.config.apiKey) return;
|
|
7750
|
+
const ENV_KEY_MAP = {
|
|
7751
|
+
google: "GEMINI_API_KEY",
|
|
7752
|
+
gemini: "GEMINI_API_KEY",
|
|
7753
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
7754
|
+
openai: "OPENAI_API_KEY",
|
|
7755
|
+
groq: "GROQ_API_KEY",
|
|
7756
|
+
xai: "XAI_API_KEY",
|
|
7757
|
+
openrouter: "OPENROUTER_API_KEY"
|
|
7758
|
+
};
|
|
7759
|
+
const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
|
|
7760
|
+
if (envKey) {
|
|
7761
|
+
process.env[envKey] = this.config.apiKey;
|
|
7762
|
+
}
|
|
7763
|
+
}
|
|
7764
|
+
resolveCwd(cwdOverride) {
|
|
7765
|
+
if (cwdOverride) {
|
|
7766
|
+
return path18.resolve(cwdOverride);
|
|
7767
|
+
}
|
|
7768
|
+
if (this.config.cwd) {
|
|
7769
|
+
return path18.resolve(this.config.cwd);
|
|
7770
|
+
}
|
|
7771
|
+
return process.cwd();
|
|
7772
|
+
}
|
|
7773
|
+
resolveTools(sdk) {
|
|
7774
|
+
if (!this.config.tools) {
|
|
7775
|
+
return sdk.codingTools;
|
|
7776
|
+
}
|
|
7777
|
+
const toolNames = this.config.tools.split(",").map((t) => t.trim().toLowerCase());
|
|
7778
|
+
const selected = [];
|
|
7779
|
+
for (const name of toolNames) {
|
|
7780
|
+
if (name in sdk.toolMap) {
|
|
7781
|
+
selected.push(sdk.toolMap[name]);
|
|
7782
|
+
}
|
|
7783
|
+
}
|
|
7784
|
+
return selected.length > 0 ? selected : sdk.codingTools;
|
|
7785
|
+
}
|
|
7786
|
+
resolveLogDirectory() {
|
|
7787
|
+
if (this.config.logDir) {
|
|
7788
|
+
return path18.resolve(this.config.logDir);
|
|
7789
|
+
}
|
|
7790
|
+
return path18.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
7791
|
+
}
|
|
7792
|
+
async createStreamLogger(request) {
|
|
7793
|
+
const logDir = this.resolveLogDirectory();
|
|
7794
|
+
if (!logDir) {
|
|
7795
|
+
return void 0;
|
|
7796
|
+
}
|
|
7797
|
+
try {
|
|
7798
|
+
await mkdir7(logDir, { recursive: true });
|
|
7799
|
+
} catch (error) {
|
|
7800
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
7801
|
+
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
7802
|
+
return void 0;
|
|
7803
|
+
}
|
|
7804
|
+
const filePath = path18.join(logDir, buildLogFilename6(request, this.targetName));
|
|
7805
|
+
try {
|
|
7806
|
+
const logger = await PiStreamLogger2.create({
|
|
7807
|
+
filePath,
|
|
7808
|
+
targetName: this.targetName,
|
|
7809
|
+
evalCaseId: request.evalCaseId,
|
|
7810
|
+
attempt: request.attempt,
|
|
7811
|
+
format: this.config.logFormat ?? "summary"
|
|
7812
|
+
});
|
|
7813
|
+
recordPiLogEntry({
|
|
7814
|
+
filePath,
|
|
7815
|
+
targetName: this.targetName,
|
|
7816
|
+
evalCaseId: request.evalCaseId,
|
|
7817
|
+
attempt: request.attempt
|
|
7818
|
+
});
|
|
7819
|
+
return logger;
|
|
7820
|
+
} catch (error) {
|
|
7821
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
7822
|
+
console.warn(`Skipping Pi stream logging for ${filePath}: ${message}`);
|
|
7823
|
+
return void 0;
|
|
7824
|
+
}
|
|
7825
|
+
}
|
|
7826
|
+
};
|
|
7827
|
+
var PiStreamLogger2 = class _PiStreamLogger {
|
|
7828
|
+
filePath;
|
|
7829
|
+
stream;
|
|
7830
|
+
startedAt = Date.now();
|
|
7831
|
+
format;
|
|
7832
|
+
constructor(filePath, format) {
|
|
7833
|
+
this.filePath = filePath;
|
|
7834
|
+
this.format = format;
|
|
7835
|
+
this.stream = createWriteStream6(filePath, { flags: "a" });
|
|
7836
|
+
}
|
|
7837
|
+
static async create(options) {
|
|
7838
|
+
const logger = new _PiStreamLogger(options.filePath, options.format);
|
|
7839
|
+
const header = [
|
|
7840
|
+
"# Pi Coding Agent stream log",
|
|
7841
|
+
`# target: ${options.targetName}`,
|
|
7842
|
+
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
7843
|
+
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
7844
|
+
`# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
7845
|
+
""
|
|
7846
|
+
].filter((line) => Boolean(line));
|
|
7847
|
+
for (const line of header) {
|
|
7848
|
+
logger.stream.write(`${line}
|
|
7849
|
+
`);
|
|
7850
|
+
}
|
|
7851
|
+
return logger;
|
|
7852
|
+
}
|
|
7853
|
+
handleEvent(event) {
|
|
7854
|
+
if (!event || typeof event !== "object") return;
|
|
7855
|
+
const record = event;
|
|
7856
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
7857
|
+
if (!type) return;
|
|
7858
|
+
const message = this.format === "json" ? JSON.stringify(event, null, 2) : summarizeSdkEvent2(event);
|
|
7859
|
+
if (message) {
|
|
7860
|
+
this.stream.write(`[+${formatElapsed6(this.startedAt)}] ${message}
|
|
7861
|
+
`);
|
|
7862
|
+
}
|
|
7863
|
+
}
|
|
7864
|
+
async close() {
|
|
7865
|
+
await new Promise((resolve, reject) => {
|
|
7866
|
+
this.stream.once("error", reject);
|
|
7867
|
+
this.stream.end(() => resolve());
|
|
7868
|
+
});
|
|
7869
|
+
}
|
|
7870
|
+
};
|
|
7871
|
+
function summarizeSdkEvent2(event) {
|
|
7872
|
+
if (!event || typeof event !== "object") return void 0;
|
|
7873
|
+
const record = event;
|
|
7874
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
7875
|
+
if (!type) return void 0;
|
|
7876
|
+
switch (type) {
|
|
7877
|
+
case "agent_start":
|
|
7878
|
+
case "agent_end":
|
|
7879
|
+
case "turn_start":
|
|
7880
|
+
case "turn_end":
|
|
7881
|
+
return type;
|
|
7882
|
+
case "message_start":
|
|
7883
|
+
case "message_end": {
|
|
7884
|
+
const msg = record.message;
|
|
7885
|
+
return `${type}: ${msg?.role ?? "unknown"}`;
|
|
7886
|
+
}
|
|
7887
|
+
case "tool_execution_start":
|
|
7888
|
+
return `tool_start: ${record.toolName}`;
|
|
7889
|
+
case "tool_execution_end":
|
|
7890
|
+
return `tool_end: ${record.toolName}`;
|
|
7891
|
+
default:
|
|
7892
|
+
return type;
|
|
7893
|
+
}
|
|
7894
|
+
}
|
|
7895
|
+
function buildLogFilename6(request, targetName) {
|
|
7896
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
7897
|
+
const evalId = sanitizeForFilename6(request.evalCaseId ?? "pi");
|
|
7898
|
+
const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
|
|
7899
|
+
const target = sanitizeForFilename6(targetName);
|
|
7900
|
+
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID8().slice(0, 8)}.log`;
|
|
7901
|
+
}
|
|
7902
|
+
function sanitizeForFilename6(value) {
|
|
7903
|
+
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
7904
|
+
return sanitized.length > 0 ? sanitized : "pi";
|
|
7905
|
+
}
|
|
7906
|
+
function formatElapsed6(startedAt) {
|
|
7907
|
+
const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
|
|
7908
|
+
const hours = Math.floor(elapsedSeconds / 3600);
|
|
7909
|
+
const minutes = Math.floor(elapsedSeconds % 3600 / 60);
|
|
7910
|
+
const seconds = elapsedSeconds % 60;
|
|
7911
|
+
if (hours > 0) {
|
|
7912
|
+
return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
7641
7913
|
}
|
|
7642
|
-
return
|
|
7914
|
+
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
7643
7915
|
}
|
|
7644
|
-
function
|
|
7916
|
+
function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
7645
7917
|
if (!message || typeof message !== "object") {
|
|
7646
|
-
return
|
|
7918
|
+
return { role: "unknown", content: String(message) };
|
|
7647
7919
|
}
|
|
7648
7920
|
const msg = message;
|
|
7649
|
-
const role = msg.role;
|
|
7650
|
-
if (typeof role !== "string") {
|
|
7651
|
-
return void 0;
|
|
7652
|
-
}
|
|
7921
|
+
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
7653
7922
|
const content = extractPiTextContent(msg.content);
|
|
7654
|
-
const toolCalls = extractToolCalls4(msg.content);
|
|
7655
|
-
const
|
|
7923
|
+
const toolCalls = extractToolCalls4(msg.content, toolTrackers, completedToolResults);
|
|
7924
|
+
const startTimeVal = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
7925
|
+
let msgTokenUsage;
|
|
7926
|
+
if (msg.usage && typeof msg.usage === "object") {
|
|
7927
|
+
const u = msg.usage;
|
|
7928
|
+
const input = toFiniteNumber(u.input);
|
|
7929
|
+
const output = toFiniteNumber(u.output);
|
|
7930
|
+
if (input !== void 0 || output !== void 0) {
|
|
7931
|
+
msgTokenUsage = {
|
|
7932
|
+
input: input ?? 0,
|
|
7933
|
+
output: output ?? 0,
|
|
7934
|
+
...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
|
|
7935
|
+
};
|
|
7936
|
+
}
|
|
7937
|
+
}
|
|
7656
7938
|
const metadata = {};
|
|
7657
7939
|
if (msg.api) metadata.api = msg.api;
|
|
7658
7940
|
if (msg.provider) metadata.provider = msg.provider;
|
|
7659
7941
|
if (msg.model) metadata.model = msg.model;
|
|
7660
|
-
if (msg.usage) metadata.usage = msg.usage;
|
|
7661
7942
|
if (msg.stopReason) metadata.stopReason = msg.stopReason;
|
|
7662
7943
|
return {
|
|
7663
7944
|
role,
|
|
7664
7945
|
content,
|
|
7665
7946
|
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
7666
|
-
startTime,
|
|
7667
|
-
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
7947
|
+
startTime: startTimeVal,
|
|
7948
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
|
|
7949
|
+
tokenUsage: msgTokenUsage
|
|
7668
7950
|
};
|
|
7669
7951
|
}
|
|
7670
|
-
function extractToolCalls4(content) {
|
|
7952
|
+
function extractToolCalls4(content, toolTrackers, completedToolResults) {
|
|
7671
7953
|
if (!Array.isArray(content)) {
|
|
7672
7954
|
return [];
|
|
7673
7955
|
}
|
|
@@ -7677,118 +7959,23 @@ function extractToolCalls4(content) {
|
|
|
7677
7959
|
continue;
|
|
7678
7960
|
}
|
|
7679
7961
|
const p = part;
|
|
7680
|
-
if (p.type === "tool_use" && typeof p.name === "string") {
|
|
7681
|
-
toolCalls.push({
|
|
7682
|
-
tool: p.name,
|
|
7683
|
-
input: p.input,
|
|
7684
|
-
id: typeof p.id === "string" ? p.id : void 0
|
|
7685
|
-
});
|
|
7686
|
-
}
|
|
7687
7962
|
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
7963
|
+
const id = typeof p.id === "string" ? p.id : void 0;
|
|
7964
|
+
const tracker = id ? toolTrackers.get(id) : void 0;
|
|
7965
|
+
const completed = id ? completedToolResults.get(id) : void 0;
|
|
7688
7966
|
toolCalls.push({
|
|
7689
7967
|
tool: p.name,
|
|
7690
7968
|
input: p.arguments,
|
|
7691
|
-
id
|
|
7969
|
+
id,
|
|
7970
|
+
output: completed?.output,
|
|
7971
|
+
durationMs: completed?.durationMs,
|
|
7972
|
+
startTime: tracker?.startTime,
|
|
7973
|
+
endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
|
|
7692
7974
|
});
|
|
7693
7975
|
}
|
|
7694
|
-
if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
|
|
7695
|
-
const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
|
|
7696
|
-
if (existing) {
|
|
7697
|
-
const idx = toolCalls.indexOf(existing);
|
|
7698
|
-
toolCalls[idx] = {
|
|
7699
|
-
...existing,
|
|
7700
|
-
output: p.content
|
|
7701
|
-
};
|
|
7702
|
-
}
|
|
7703
|
-
}
|
|
7704
7976
|
}
|
|
7705
7977
|
return toolCalls;
|
|
7706
7978
|
}
|
|
7707
|
-
function escapeAtSymbols(prompt) {
|
|
7708
|
-
return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
|
|
7709
|
-
}
|
|
7710
|
-
function pickDetail(stderr, stdout) {
|
|
7711
|
-
const errorText = stderr.trim();
|
|
7712
|
-
if (errorText.length > 0) {
|
|
7713
|
-
return errorText;
|
|
7714
|
-
}
|
|
7715
|
-
const stdoutText = stdout.trim();
|
|
7716
|
-
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
7717
|
-
}
|
|
7718
|
-
function formatTimeoutSuffix3(timeoutMs) {
|
|
7719
|
-
if (!timeoutMs || timeoutMs <= 0) {
|
|
7720
|
-
return "";
|
|
7721
|
-
}
|
|
7722
|
-
const seconds = Math.ceil(timeoutMs / 1e3);
|
|
7723
|
-
return ` after ${seconds}s`;
|
|
7724
|
-
}
|
|
7725
|
-
async function defaultPiRunner(options) {
|
|
7726
|
-
return await new Promise((resolve, reject) => {
|
|
7727
|
-
const parts = options.executable.split(/\s+/);
|
|
7728
|
-
const executable = parts[0];
|
|
7729
|
-
const executableArgs = parts.slice(1);
|
|
7730
|
-
const allArgs = [...executableArgs, ...options.args];
|
|
7731
|
-
const child = spawn3(executable, allArgs, {
|
|
7732
|
-
cwd: options.cwd,
|
|
7733
|
-
env: options.env,
|
|
7734
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
7735
|
-
shell: false
|
|
7736
|
-
});
|
|
7737
|
-
let stdout = "";
|
|
7738
|
-
let stderr = "";
|
|
7739
|
-
let timedOut = false;
|
|
7740
|
-
const onAbort = () => {
|
|
7741
|
-
child.kill("SIGTERM");
|
|
7742
|
-
};
|
|
7743
|
-
if (options.signal) {
|
|
7744
|
-
if (options.signal.aborted) {
|
|
7745
|
-
onAbort();
|
|
7746
|
-
} else {
|
|
7747
|
-
options.signal.addEventListener("abort", onAbort, { once: true });
|
|
7748
|
-
}
|
|
7749
|
-
}
|
|
7750
|
-
let timeoutHandle;
|
|
7751
|
-
if (options.timeoutMs && options.timeoutMs > 0) {
|
|
7752
|
-
timeoutHandle = setTimeout(() => {
|
|
7753
|
-
timedOut = true;
|
|
7754
|
-
child.kill("SIGTERM");
|
|
7755
|
-
}, options.timeoutMs);
|
|
7756
|
-
timeoutHandle.unref?.();
|
|
7757
|
-
}
|
|
7758
|
-
child.stdout.setEncoding("utf8");
|
|
7759
|
-
child.stdout.on("data", (chunk) => {
|
|
7760
|
-
stdout += chunk;
|
|
7761
|
-
options.onStdoutChunk?.(chunk);
|
|
7762
|
-
});
|
|
7763
|
-
child.stderr.setEncoding("utf8");
|
|
7764
|
-
child.stderr.on("data", (chunk) => {
|
|
7765
|
-
stderr += chunk;
|
|
7766
|
-
options.onStderrChunk?.(chunk);
|
|
7767
|
-
});
|
|
7768
|
-
child.stdin.end();
|
|
7769
|
-
const cleanup = () => {
|
|
7770
|
-
if (timeoutHandle) {
|
|
7771
|
-
clearTimeout(timeoutHandle);
|
|
7772
|
-
}
|
|
7773
|
-
if (options.signal) {
|
|
7774
|
-
options.signal.removeEventListener("abort", onAbort);
|
|
7775
|
-
}
|
|
7776
|
-
};
|
|
7777
|
-
child.on("error", (error) => {
|
|
7778
|
-
cleanup();
|
|
7779
|
-
reject(error);
|
|
7780
|
-
});
|
|
7781
|
-
child.on("close", (code) => {
|
|
7782
|
-
cleanup();
|
|
7783
|
-
resolve({
|
|
7784
|
-
stdout,
|
|
7785
|
-
stderr,
|
|
7786
|
-
exitCode: typeof code === "number" ? code : -1,
|
|
7787
|
-
timedOut
|
|
7788
|
-
});
|
|
7789
|
-
});
|
|
7790
|
-
});
|
|
7791
|
-
}
|
|
7792
7979
|
|
|
7793
7980
|
// src/evaluation/providers/provider-registry.ts
|
|
7794
7981
|
var ProviderRegistry = class {
|
|
@@ -7828,17 +8015,17 @@ var ProviderRegistry = class {
|
|
|
7828
8015
|
// src/evaluation/providers/vscode-provider.ts
|
|
7829
8016
|
import { exec as exec2 } from "node:child_process";
|
|
7830
8017
|
import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
|
|
7831
|
-
import
|
|
8018
|
+
import path30 from "node:path";
|
|
7832
8019
|
import { promisify as promisify3 } from "node:util";
|
|
7833
8020
|
|
|
7834
8021
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
7835
8022
|
import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
|
|
7836
|
-
import
|
|
8023
|
+
import path28 from "node:path";
|
|
7837
8024
|
|
|
7838
8025
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
7839
8026
|
import { constants as constants2 } from "node:fs";
|
|
7840
|
-
import { access as access2, mkdir as
|
|
7841
|
-
import
|
|
8027
|
+
import { access as access2, mkdir as mkdir8, readdir, rm as rm2, stat } from "node:fs/promises";
|
|
8028
|
+
import path19 from "node:path";
|
|
7842
8029
|
async function pathExists(target) {
|
|
7843
8030
|
try {
|
|
7844
8031
|
await access2(target, constants2.F_OK);
|
|
@@ -7848,13 +8035,13 @@ async function pathExists(target) {
|
|
|
7848
8035
|
}
|
|
7849
8036
|
}
|
|
7850
8037
|
async function ensureDir(target) {
|
|
7851
|
-
await
|
|
8038
|
+
await mkdir8(target, { recursive: true });
|
|
7852
8039
|
}
|
|
7853
8040
|
async function readDirEntries(target) {
|
|
7854
8041
|
const entries = await readdir(target, { withFileTypes: true });
|
|
7855
8042
|
return entries.map((entry) => ({
|
|
7856
8043
|
name: entry.name,
|
|
7857
|
-
absolutePath:
|
|
8044
|
+
absolutePath: path19.join(target, entry.name),
|
|
7858
8045
|
isDirectory: entry.isDirectory()
|
|
7859
8046
|
}));
|
|
7860
8047
|
}
|
|
@@ -7869,9 +8056,9 @@ async function removeIfExists(target) {
|
|
|
7869
8056
|
}
|
|
7870
8057
|
|
|
7871
8058
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
7872
|
-
import
|
|
8059
|
+
import path20 from "node:path";
|
|
7873
8060
|
function pathToFileUri2(filePath) {
|
|
7874
|
-
const absolutePath =
|
|
8061
|
+
const absolutePath = path20.isAbsolute(filePath) ? filePath : path20.resolve(filePath);
|
|
7875
8062
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
7876
8063
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
7877
8064
|
return `file:///${normalizedPath}`;
|
|
@@ -7880,7 +8067,7 @@ function pathToFileUri2(filePath) {
|
|
|
7880
8067
|
}
|
|
7881
8068
|
|
|
7882
8069
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
7883
|
-
import
|
|
8070
|
+
import path21 from "node:path";
|
|
7884
8071
|
|
|
7885
8072
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
7886
8073
|
function renderTemplate2(content, variables) {
|
|
@@ -7972,8 +8159,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
7972
8159
|
});
|
|
7973
8160
|
}
|
|
7974
8161
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
7975
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
7976
|
-
const responseList = responseFiles.map((file) => `"${
|
|
8162
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path21.basename(file)}`).join("\n");
|
|
8163
|
+
const responseList = responseFiles.map((file) => `"${path21.basename(file)}"`).join(", ");
|
|
7977
8164
|
return renderTemplate2(templateContent, {
|
|
7978
8165
|
requestFiles: requestLines,
|
|
7979
8166
|
responseList
|
|
@@ -7982,7 +8169,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
7982
8169
|
|
|
7983
8170
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
7984
8171
|
import { readFile as readFile7 } from "node:fs/promises";
|
|
7985
|
-
import
|
|
8172
|
+
import path22 from "node:path";
|
|
7986
8173
|
|
|
7987
8174
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
7988
8175
|
function sleep2(ms) {
|
|
@@ -8041,7 +8228,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
8041
8228
|
}
|
|
8042
8229
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
8043
8230
|
if (!silent) {
|
|
8044
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
8231
|
+
const fileList = responseFilesFinal.map((file) => path22.basename(file)).join(", ");
|
|
8045
8232
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
8046
8233
|
}
|
|
8047
8234
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -8050,7 +8237,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8050
8237
|
while (pending.size > 0) {
|
|
8051
8238
|
if (Date.now() >= deadline) {
|
|
8052
8239
|
if (!silent) {
|
|
8053
|
-
const remaining = [...pending].map((f) =>
|
|
8240
|
+
const remaining = [...pending].map((f) => path22.basename(f)).join(", ");
|
|
8054
8241
|
console.error(
|
|
8055
8242
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
8056
8243
|
);
|
|
@@ -8100,16 +8287,16 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
8100
8287
|
|
|
8101
8288
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
8102
8289
|
import { exec, spawn as spawn4 } from "node:child_process";
|
|
8103
|
-
import { mkdir as
|
|
8104
|
-
import
|
|
8290
|
+
import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
|
|
8291
|
+
import path25 from "node:path";
|
|
8105
8292
|
import { promisify as promisify2 } from "node:util";
|
|
8106
8293
|
|
|
8107
8294
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
8108
|
-
import
|
|
8295
|
+
import path24 from "node:path";
|
|
8109
8296
|
|
|
8110
8297
|
// src/paths.ts
|
|
8111
8298
|
import os2 from "node:os";
|
|
8112
|
-
import
|
|
8299
|
+
import path23 from "node:path";
|
|
8113
8300
|
var logged = false;
|
|
8114
8301
|
function getAgentvHome() {
|
|
8115
8302
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -8120,19 +8307,19 @@ function getAgentvHome() {
|
|
|
8120
8307
|
}
|
|
8121
8308
|
return envHome;
|
|
8122
8309
|
}
|
|
8123
|
-
return
|
|
8310
|
+
return path23.join(os2.homedir(), ".agentv");
|
|
8124
8311
|
}
|
|
8125
8312
|
function getWorkspacesRoot() {
|
|
8126
|
-
return
|
|
8313
|
+
return path23.join(getAgentvHome(), "workspaces");
|
|
8127
8314
|
}
|
|
8128
8315
|
function getSubagentsRoot() {
|
|
8129
|
-
return
|
|
8316
|
+
return path23.join(getAgentvHome(), "subagents");
|
|
8130
8317
|
}
|
|
8131
8318
|
function getTraceStateRoot() {
|
|
8132
|
-
return
|
|
8319
|
+
return path23.join(getAgentvHome(), "trace-state");
|
|
8133
8320
|
}
|
|
8134
8321
|
function getWorkspacePoolRoot() {
|
|
8135
|
-
return
|
|
8322
|
+
return path23.join(getAgentvHome(), "workspace-pool");
|
|
8136
8323
|
}
|
|
8137
8324
|
|
|
8138
8325
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -8140,7 +8327,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
8140
8327
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
8141
8328
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
8142
8329
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
8143
|
-
return
|
|
8330
|
+
return path24.join(getSubagentsRoot(), folder);
|
|
8144
8331
|
}
|
|
8145
8332
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
8146
8333
|
|
|
@@ -8207,11 +8394,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8207
8394
|
await raceSpawnError(child);
|
|
8208
8395
|
return true;
|
|
8209
8396
|
}
|
|
8210
|
-
const aliveFile =
|
|
8397
|
+
const aliveFile = path25.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
8211
8398
|
await removeIfExists(aliveFile);
|
|
8212
|
-
const githubAgentsDir =
|
|
8213
|
-
await
|
|
8214
|
-
const wakeupDst =
|
|
8399
|
+
const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
|
|
8400
|
+
await mkdir9(githubAgentsDir, { recursive: true });
|
|
8401
|
+
const wakeupDst = path25.join(githubAgentsDir, "wakeup.md");
|
|
8215
8402
|
await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
8216
8403
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
8217
8404
|
label: "open-workspace"
|
|
@@ -8224,7 +8411,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8224
8411
|
"chat",
|
|
8225
8412
|
"-m",
|
|
8226
8413
|
wakeupChatId,
|
|
8227
|
-
`create a file named .alive in the ${
|
|
8414
|
+
`create a file named .alive in the ${path25.basename(subagentDir)} folder`
|
|
8228
8415
|
];
|
|
8229
8416
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
8230
8417
|
await raceSpawnError(wakeupChild);
|
|
@@ -8239,10 +8426,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
8239
8426
|
return true;
|
|
8240
8427
|
}
|
|
8241
8428
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
8242
|
-
const workspacePath =
|
|
8243
|
-
const messagesDir =
|
|
8244
|
-
await
|
|
8245
|
-
const reqFile =
|
|
8429
|
+
const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
|
|
8430
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
8431
|
+
await mkdir9(messagesDir, { recursive: true });
|
|
8432
|
+
const reqFile = path25.join(messagesDir, `${timestamp}_req.md`);
|
|
8246
8433
|
await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
|
|
8247
8434
|
const reqUri = pathToFileUri2(reqFile);
|
|
8248
8435
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
@@ -8250,16 +8437,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8250
8437
|
chatArgs.push("-a", attachment);
|
|
8251
8438
|
}
|
|
8252
8439
|
chatArgs.push("-a", reqFile);
|
|
8253
|
-
chatArgs.push(`Follow instructions in [${
|
|
8440
|
+
chatArgs.push(`Follow instructions in [${path25.basename(reqFile)}](${reqUri})`);
|
|
8254
8441
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8255
8442
|
workspacePath,
|
|
8256
|
-
|
|
8443
|
+
path25.basename(subagentDir),
|
|
8257
8444
|
subagentDir,
|
|
8258
8445
|
vscodeCmd
|
|
8259
8446
|
);
|
|
8260
8447
|
if (!workspaceReady) {
|
|
8261
8448
|
throw new Error(
|
|
8262
|
-
`VS Code workspace '${
|
|
8449
|
+
`VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8263
8450
|
);
|
|
8264
8451
|
}
|
|
8265
8452
|
await sleep2(500);
|
|
@@ -8267,9 +8454,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
8267
8454
|
await raceSpawnError(child);
|
|
8268
8455
|
}
|
|
8269
8456
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
8270
|
-
const workspacePath =
|
|
8271
|
-
const messagesDir =
|
|
8272
|
-
await
|
|
8457
|
+
const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
|
|
8458
|
+
const messagesDir = path25.join(subagentDir, "messages");
|
|
8459
|
+
await mkdir9(messagesDir, { recursive: true });
|
|
8273
8460
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
8274
8461
|
for (const attachment of attachmentPaths) {
|
|
8275
8462
|
chatArgs.push("-a", attachment);
|
|
@@ -8277,13 +8464,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8277
8464
|
chatArgs.push(chatInstruction);
|
|
8278
8465
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
8279
8466
|
workspacePath,
|
|
8280
|
-
|
|
8467
|
+
path25.basename(subagentDir),
|
|
8281
8468
|
subagentDir,
|
|
8282
8469
|
vscodeCmd
|
|
8283
8470
|
);
|
|
8284
8471
|
if (!workspaceReady) {
|
|
8285
8472
|
throw new Error(
|
|
8286
|
-
`VS Code workspace '${
|
|
8473
|
+
`VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
8287
8474
|
);
|
|
8288
8475
|
}
|
|
8289
8476
|
await sleep2(500);
|
|
@@ -8292,11 +8479,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
8292
8479
|
}
|
|
8293
8480
|
|
|
8294
8481
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
8295
|
-
import { copyFile, mkdir as
|
|
8296
|
-
import
|
|
8482
|
+
import { copyFile, mkdir as mkdir10, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
|
|
8483
|
+
import path27 from "node:path";
|
|
8297
8484
|
|
|
8298
8485
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
8299
|
-
import
|
|
8486
|
+
import path26 from "node:path";
|
|
8300
8487
|
import JSON5 from "json5";
|
|
8301
8488
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
8302
8489
|
let workspace;
|
|
@@ -8313,10 +8500,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8313
8500
|
}
|
|
8314
8501
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
8315
8502
|
const folderPath = folder.path;
|
|
8316
|
-
if (
|
|
8503
|
+
if (path26.isAbsolute(folderPath)) {
|
|
8317
8504
|
return folder;
|
|
8318
8505
|
}
|
|
8319
|
-
const absolutePath =
|
|
8506
|
+
const absolutePath = path26.resolve(templateDir, folderPath);
|
|
8320
8507
|
return {
|
|
8321
8508
|
...folder,
|
|
8322
8509
|
path: absolutePath
|
|
@@ -8338,19 +8525,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
8338
8525
|
if (locationMap && typeof locationMap === "object") {
|
|
8339
8526
|
const transformedMap = {};
|
|
8340
8527
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
8341
|
-
const isAbsolute =
|
|
8528
|
+
const isAbsolute = path26.isAbsolute(locationPath);
|
|
8342
8529
|
if (isAbsolute) {
|
|
8343
8530
|
transformedMap[locationPath] = value;
|
|
8344
8531
|
} else {
|
|
8345
8532
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
8346
8533
|
if (firstGlobIndex === -1) {
|
|
8347
|
-
const resolvedPath =
|
|
8534
|
+
const resolvedPath = path26.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
8348
8535
|
transformedMap[resolvedPath] = value;
|
|
8349
8536
|
} else {
|
|
8350
8537
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
8351
8538
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
8352
8539
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
8353
|
-
const resolvedPath = (
|
|
8540
|
+
const resolvedPath = (path26.resolve(templateDir, basePath) + patternPath).replace(
|
|
8354
8541
|
/\\/g,
|
|
8355
8542
|
"/"
|
|
8356
8543
|
);
|
|
@@ -8391,7 +8578,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8391
8578
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
8392
8579
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
8393
8580
|
for (const subagent of subagents) {
|
|
8394
|
-
const lockFile =
|
|
8581
|
+
const lockFile = path27.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
8395
8582
|
if (!await pathExists(lockFile)) {
|
|
8396
8583
|
return subagent.absolutePath;
|
|
8397
8584
|
}
|
|
@@ -8401,7 +8588,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
8401
8588
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
8402
8589
|
let workspaceContent;
|
|
8403
8590
|
if (workspaceTemplate) {
|
|
8404
|
-
const workspaceSrc =
|
|
8591
|
+
const workspaceSrc = path27.resolve(workspaceTemplate);
|
|
8405
8592
|
if (!await pathExists(workspaceSrc)) {
|
|
8406
8593
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
8407
8594
|
}
|
|
@@ -8414,13 +8601,13 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8414
8601
|
} else {
|
|
8415
8602
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
8416
8603
|
}
|
|
8417
|
-
const workspaceName = `${
|
|
8418
|
-
const workspaceDst =
|
|
8419
|
-
const templateDir = workspaceTemplate ?
|
|
8604
|
+
const workspaceName = `${path27.basename(subagentDir)}.code-workspace`;
|
|
8605
|
+
const workspaceDst = path27.join(subagentDir, workspaceName);
|
|
8606
|
+
const templateDir = workspaceTemplate ? path27.dirname(path27.resolve(workspaceTemplate)) : subagentDir;
|
|
8420
8607
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
8421
8608
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
8422
8609
|
if (cwd) {
|
|
8423
|
-
const absCwd =
|
|
8610
|
+
const absCwd = path27.resolve(cwd);
|
|
8424
8611
|
const parsed = JSON.parse(transformedContent);
|
|
8425
8612
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
8426
8613
|
if (!alreadyPresent) {
|
|
@@ -8429,35 +8616,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
8429
8616
|
}
|
|
8430
8617
|
}
|
|
8431
8618
|
await writeFile3(workspaceDst, transformedContent, "utf8");
|
|
8432
|
-
const messagesDir =
|
|
8433
|
-
await
|
|
8619
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8620
|
+
await mkdir10(messagesDir, { recursive: true });
|
|
8434
8621
|
return { workspace: workspaceDst, messagesDir };
|
|
8435
8622
|
}
|
|
8436
8623
|
async function createSubagentLock(subagentDir) {
|
|
8437
|
-
const messagesDir =
|
|
8624
|
+
const messagesDir = path27.join(subagentDir, "messages");
|
|
8438
8625
|
if (await pathExists(messagesDir)) {
|
|
8439
8626
|
const files = await readdir2(messagesDir);
|
|
8440
8627
|
await Promise.all(
|
|
8441
8628
|
files.map(async (file) => {
|
|
8442
|
-
const target =
|
|
8629
|
+
const target = path27.join(messagesDir, file);
|
|
8443
8630
|
await removeIfExists(target);
|
|
8444
8631
|
})
|
|
8445
8632
|
);
|
|
8446
8633
|
}
|
|
8447
|
-
const githubAgentsDir =
|
|
8634
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8448
8635
|
if (await pathExists(githubAgentsDir)) {
|
|
8449
8636
|
const agentFiles = await readdir2(githubAgentsDir);
|
|
8450
8637
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
8451
8638
|
await Promise.all(
|
|
8452
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
8639
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path27.join(githubAgentsDir, file)))
|
|
8453
8640
|
);
|
|
8454
8641
|
}
|
|
8455
|
-
const lockFile =
|
|
8642
|
+
const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8456
8643
|
await writeFile3(lockFile, "", { encoding: "utf8" });
|
|
8457
8644
|
return lockFile;
|
|
8458
8645
|
}
|
|
8459
8646
|
async function removeSubagentLock(subagentDir) {
|
|
8460
|
-
const lockFile =
|
|
8647
|
+
const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
8461
8648
|
await removeIfExists(lockFile);
|
|
8462
8649
|
}
|
|
8463
8650
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -8477,9 +8664,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
8477
8664
|
return 1;
|
|
8478
8665
|
}
|
|
8479
8666
|
if (promptFile) {
|
|
8480
|
-
const githubAgentsDir =
|
|
8481
|
-
await
|
|
8482
|
-
const agentFile =
|
|
8667
|
+
const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
|
|
8668
|
+
await mkdir10(githubAgentsDir, { recursive: true });
|
|
8669
|
+
const agentFile = path27.join(githubAgentsDir, `${chatId}.md`);
|
|
8483
8670
|
try {
|
|
8484
8671
|
await copyFile(promptFile, agentFile);
|
|
8485
8672
|
} catch (error) {
|
|
@@ -8498,7 +8685,7 @@ async function resolvePromptFile(promptFile) {
|
|
|
8498
8685
|
if (!promptFile) {
|
|
8499
8686
|
return void 0;
|
|
8500
8687
|
}
|
|
8501
|
-
const resolvedPrompt =
|
|
8688
|
+
const resolvedPrompt = path28.resolve(promptFile);
|
|
8502
8689
|
if (!await pathExists(resolvedPrompt)) {
|
|
8503
8690
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
8504
8691
|
}
|
|
@@ -8514,7 +8701,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
8514
8701
|
}
|
|
8515
8702
|
const resolved = [];
|
|
8516
8703
|
for (const attachment of extraAttachments) {
|
|
8517
|
-
const resolvedPath =
|
|
8704
|
+
const resolvedPath = path28.resolve(attachment);
|
|
8518
8705
|
if (!await pathExists(resolvedPath)) {
|
|
8519
8706
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
8520
8707
|
}
|
|
@@ -8556,7 +8743,7 @@ async function dispatchAgentSession(options) {
|
|
|
8556
8743
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8557
8744
|
};
|
|
8558
8745
|
}
|
|
8559
|
-
const subagentName =
|
|
8746
|
+
const subagentName = path28.basename(subagentDir);
|
|
8560
8747
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8561
8748
|
const preparationResult = await prepareSubagentDirectory(
|
|
8562
8749
|
subagentDir,
|
|
@@ -8584,9 +8771,9 @@ async function dispatchAgentSession(options) {
|
|
|
8584
8771
|
};
|
|
8585
8772
|
}
|
|
8586
8773
|
const timestamp = generateTimestamp();
|
|
8587
|
-
const messagesDir =
|
|
8588
|
-
const responseFileTmp =
|
|
8589
|
-
const responseFileFinal =
|
|
8774
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
8775
|
+
const responseFileTmp = path28.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
8776
|
+
const responseFileFinal = path28.join(messagesDir, `${timestamp}_res.md`);
|
|
8590
8777
|
const requestInstructions = createRequestPrompt(
|
|
8591
8778
|
userQuery,
|
|
8592
8779
|
responseFileTmp,
|
|
@@ -8691,7 +8878,7 @@ async function dispatchBatchAgent(options) {
|
|
|
8691
8878
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
8692
8879
|
};
|
|
8693
8880
|
}
|
|
8694
|
-
subagentName =
|
|
8881
|
+
subagentName = path28.basename(subagentDir);
|
|
8695
8882
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
8696
8883
|
const preparationResult = await prepareSubagentDirectory(
|
|
8697
8884
|
subagentDir,
|
|
@@ -8722,17 +8909,17 @@ async function dispatchBatchAgent(options) {
|
|
|
8722
8909
|
};
|
|
8723
8910
|
}
|
|
8724
8911
|
const timestamp = generateTimestamp();
|
|
8725
|
-
const messagesDir =
|
|
8912
|
+
const messagesDir = path28.join(subagentDir, "messages");
|
|
8726
8913
|
requestFiles = userQueries.map(
|
|
8727
|
-
(_, index) =>
|
|
8914
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
8728
8915
|
);
|
|
8729
8916
|
const responseTmpFiles = userQueries.map(
|
|
8730
|
-
(_, index) =>
|
|
8917
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
8731
8918
|
);
|
|
8732
8919
|
responseFilesFinal = userQueries.map(
|
|
8733
|
-
(_, index) =>
|
|
8920
|
+
(_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
8734
8921
|
);
|
|
8735
|
-
const orchestratorFile =
|
|
8922
|
+
const orchestratorFile = path28.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
8736
8923
|
if (!dryRun) {
|
|
8737
8924
|
await Promise.all(
|
|
8738
8925
|
userQueries.map((query, index) => {
|
|
@@ -8818,7 +9005,7 @@ async function dispatchBatchAgent(options) {
|
|
|
8818
9005
|
|
|
8819
9006
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
8820
9007
|
import { writeFile as writeFile5 } from "node:fs/promises";
|
|
8821
|
-
import
|
|
9008
|
+
import path29 from "node:path";
|
|
8822
9009
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
8823
9010
|
folders: [
|
|
8824
9011
|
{
|
|
@@ -8849,7 +9036,7 @@ async function provisionSubagents(options) {
|
|
|
8849
9036
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
8850
9037
|
throw new Error("subagents must be a positive integer");
|
|
8851
9038
|
}
|
|
8852
|
-
const targetPath =
|
|
9039
|
+
const targetPath = path29.resolve(targetRoot);
|
|
8853
9040
|
if (!dryRun) {
|
|
8854
9041
|
await ensureDir(targetPath);
|
|
8855
9042
|
}
|
|
@@ -8869,7 +9056,7 @@ async function provisionSubagents(options) {
|
|
|
8869
9056
|
continue;
|
|
8870
9057
|
}
|
|
8871
9058
|
highestNumber = Math.max(highestNumber, parsed);
|
|
8872
|
-
const lockFile =
|
|
9059
|
+
const lockFile = path29.join(entry.absolutePath, lockName);
|
|
8873
9060
|
const locked = await pathExists(lockFile);
|
|
8874
9061
|
if (locked) {
|
|
8875
9062
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -8886,10 +9073,10 @@ async function provisionSubagents(options) {
|
|
|
8886
9073
|
break;
|
|
8887
9074
|
}
|
|
8888
9075
|
const subagentDir = subagent.absolutePath;
|
|
8889
|
-
const githubAgentsDir =
|
|
8890
|
-
const lockFile =
|
|
8891
|
-
const workspaceDst =
|
|
8892
|
-
const wakeupDst =
|
|
9076
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
9077
|
+
const lockFile = path29.join(subagentDir, lockName);
|
|
9078
|
+
const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
9079
|
+
const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
|
|
8893
9080
|
const isLocked = await pathExists(lockFile);
|
|
8894
9081
|
if (isLocked && !force) {
|
|
8895
9082
|
continue;
|
|
@@ -8927,10 +9114,10 @@ async function provisionSubagents(options) {
|
|
|
8927
9114
|
let nextIndex = highestNumber;
|
|
8928
9115
|
while (subagentsProvisioned < subagents) {
|
|
8929
9116
|
nextIndex += 1;
|
|
8930
|
-
const subagentDir =
|
|
8931
|
-
const githubAgentsDir =
|
|
8932
|
-
const workspaceDst =
|
|
8933
|
-
const wakeupDst =
|
|
9117
|
+
const subagentDir = path29.join(targetPath, `subagent-${nextIndex}`);
|
|
9118
|
+
const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
|
|
9119
|
+
const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
|
|
9120
|
+
const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
|
|
8934
9121
|
if (!dryRun) {
|
|
8935
9122
|
await ensureDir(subagentDir);
|
|
8936
9123
|
await ensureDir(githubAgentsDir);
|
|
@@ -9120,7 +9307,7 @@ var VSCodeProvider = class {
|
|
|
9120
9307
|
async function locateVSCodeExecutable(candidate) {
|
|
9121
9308
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
9122
9309
|
if (includesPathSeparator) {
|
|
9123
|
-
const resolved =
|
|
9310
|
+
const resolved = path30.isAbsolute(candidate) ? candidate : path30.resolve(candidate);
|
|
9124
9311
|
try {
|
|
9125
9312
|
await access3(resolved, constants3.F_OK);
|
|
9126
9313
|
return resolved;
|
|
@@ -9149,7 +9336,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
9149
9336
|
return void 0;
|
|
9150
9337
|
}
|
|
9151
9338
|
try {
|
|
9152
|
-
const stats = await stat4(
|
|
9339
|
+
const stats = await stat4(path30.resolve(template));
|
|
9153
9340
|
return stats.isFile() ? template : void 0;
|
|
9154
9341
|
} catch {
|
|
9155
9342
|
return template;
|
|
@@ -9173,7 +9360,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
9173
9360
|
return "";
|
|
9174
9361
|
}
|
|
9175
9362
|
const buildList = (files) => files.map((absolutePath) => {
|
|
9176
|
-
const fileName =
|
|
9363
|
+
const fileName = path30.basename(absolutePath);
|
|
9177
9364
|
const fileUri = pathToFileUri3(absolutePath);
|
|
9178
9365
|
return `* [${fileName}](${fileUri})`;
|
|
9179
9366
|
});
|
|
@@ -9194,7 +9381,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9194
9381
|
}
|
|
9195
9382
|
const unique = /* @__PURE__ */ new Map();
|
|
9196
9383
|
for (const attachment of attachments) {
|
|
9197
|
-
const absolutePath =
|
|
9384
|
+
const absolutePath = path30.resolve(attachment);
|
|
9198
9385
|
if (!unique.has(absolutePath)) {
|
|
9199
9386
|
unique.set(absolutePath, absolutePath);
|
|
9200
9387
|
}
|
|
@@ -9202,7 +9389,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
9202
9389
|
return Array.from(unique.values());
|
|
9203
9390
|
}
|
|
9204
9391
|
function pathToFileUri3(filePath) {
|
|
9205
|
-
const absolutePath =
|
|
9392
|
+
const absolutePath = path30.isAbsolute(filePath) ? filePath : path30.resolve(filePath);
|
|
9206
9393
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
9207
9394
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
9208
9395
|
return `file:///${normalizedPath}`;
|
|
@@ -9215,7 +9402,7 @@ function normalizeAttachments(attachments) {
|
|
|
9215
9402
|
}
|
|
9216
9403
|
const deduped = /* @__PURE__ */ new Set();
|
|
9217
9404
|
for (const attachment of attachments) {
|
|
9218
|
-
deduped.add(
|
|
9405
|
+
deduped.add(path30.resolve(attachment));
|
|
9219
9406
|
}
|
|
9220
9407
|
return Array.from(deduped);
|
|
9221
9408
|
}
|
|
@@ -9224,7 +9411,7 @@ function mergeAttachments(all) {
|
|
|
9224
9411
|
for (const list of all) {
|
|
9225
9412
|
if (!list) continue;
|
|
9226
9413
|
for (const inputFile of list) {
|
|
9227
|
-
deduped.add(
|
|
9414
|
+
deduped.add(path30.resolve(inputFile));
|
|
9228
9415
|
}
|
|
9229
9416
|
}
|
|
9230
9417
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -9273,7 +9460,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
9273
9460
|
// src/evaluation/providers/targets-file.ts
|
|
9274
9461
|
import { constants as constants4 } from "node:fs";
|
|
9275
9462
|
import { access as access4, readFile as readFile9 } from "node:fs/promises";
|
|
9276
|
-
import
|
|
9463
|
+
import path31 from "node:path";
|
|
9277
9464
|
import { parse as parse4 } from "yaml";
|
|
9278
9465
|
function isRecord(value) {
|
|
9279
9466
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -9310,7 +9497,7 @@ async function fileExists3(filePath) {
|
|
|
9310
9497
|
}
|
|
9311
9498
|
}
|
|
9312
9499
|
async function readTargetDefinitions(filePath) {
|
|
9313
|
-
const absolutePath =
|
|
9500
|
+
const absolutePath = path31.resolve(filePath);
|
|
9314
9501
|
if (!await fileExists3(absolutePath)) {
|
|
9315
9502
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
9316
9503
|
}
|
|
@@ -9330,16 +9517,16 @@ function listTargetNames(definitions) {
|
|
|
9330
9517
|
}
|
|
9331
9518
|
|
|
9332
9519
|
// src/evaluation/providers/provider-discovery.ts
|
|
9333
|
-
import
|
|
9520
|
+
import path32 from "node:path";
|
|
9334
9521
|
import fg from "fast-glob";
|
|
9335
9522
|
async function discoverProviders(registry, baseDir) {
|
|
9336
9523
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
9337
9524
|
const candidateDirs = [];
|
|
9338
|
-
let dir =
|
|
9339
|
-
const root =
|
|
9525
|
+
let dir = path32.resolve(baseDir);
|
|
9526
|
+
const root = path32.parse(dir).root;
|
|
9340
9527
|
while (dir !== root) {
|
|
9341
|
-
candidateDirs.push(
|
|
9342
|
-
dir =
|
|
9528
|
+
candidateDirs.push(path32.join(dir, ".agentv", "providers"));
|
|
9529
|
+
dir = path32.dirname(dir);
|
|
9343
9530
|
}
|
|
9344
9531
|
let files = [];
|
|
9345
9532
|
for (const providersDir of candidateDirs) {
|
|
@@ -9355,7 +9542,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9355
9542
|
}
|
|
9356
9543
|
const discoveredKinds = [];
|
|
9357
9544
|
for (const filePath of files) {
|
|
9358
|
-
const basename =
|
|
9545
|
+
const basename = path32.basename(filePath);
|
|
9359
9546
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
9360
9547
|
if (registry.has(kindName)) {
|
|
9361
9548
|
continue;
|
|
@@ -9373,7 +9560,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
9373
9560
|
// src/evaluation/providers/index.ts
|
|
9374
9561
|
function createBuiltinProviderRegistry() {
|
|
9375
9562
|
const registry = new ProviderRegistry();
|
|
9376
|
-
registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-
|
|
9563
|
+
registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
|
|
9377
9564
|
"vscode-insiders",
|
|
9378
9565
|
(t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
|
|
9379
9566
|
);
|
|
@@ -9564,15 +9751,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
9564
9751
|
});
|
|
9565
9752
|
}
|
|
9566
9753
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
9567
|
-
const { mkdir:
|
|
9754
|
+
const { mkdir: mkdir16, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
|
|
9568
9755
|
const { tmpdir: tmpdir3 } = await import("node:os");
|
|
9569
|
-
const
|
|
9570
|
-
const { randomUUID:
|
|
9571
|
-
const dir =
|
|
9572
|
-
await
|
|
9573
|
-
const stdinPath =
|
|
9574
|
-
const stdoutPath =
|
|
9575
|
-
const stderrPath =
|
|
9756
|
+
const path45 = await import("node:path");
|
|
9757
|
+
const { randomUUID: randomUUID10 } = await import("node:crypto");
|
|
9758
|
+
const dir = path45.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
9759
|
+
await mkdir16(dir, { recursive: true });
|
|
9760
|
+
const stdinPath = path45.join(dir, "stdin.txt");
|
|
9761
|
+
const stdoutPath = path45.join(dir, "stdout.txt");
|
|
9762
|
+
const stderrPath = path45.join(dir, "stderr.txt");
|
|
9576
9763
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
9577
9764
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
9578
9765
|
const { spawn: spawn5 } = await import("node:child_process");
|
|
@@ -10051,7 +10238,7 @@ import { generateText as generateText3 } from "ai";
|
|
|
10051
10238
|
|
|
10052
10239
|
// src/evaluation/evaluators/llm-grader.ts
|
|
10053
10240
|
import fs2 from "node:fs/promises";
|
|
10054
|
-
import
|
|
10241
|
+
import path33 from "node:path";
|
|
10055
10242
|
import { generateText as generateText2, stepCountIs, tool } from "ai";
|
|
10056
10243
|
import { z as z3 } from "zod";
|
|
10057
10244
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -10240,7 +10427,7 @@ ${context.fileChanges}`;
|
|
|
10240
10427
|
async evaluateWithRubrics(context, graderProvider, rubrics) {
|
|
10241
10428
|
if (!rubrics || rubrics.length === 0) {
|
|
10242
10429
|
throw new Error(
|
|
10243
|
-
`No rubrics found for evaluator "${context.evaluator?.name ?? "llm-grader"}".
|
|
10430
|
+
`No rubrics found for evaluator "${context.evaluator?.name ?? "llm-grader"}". Add rubric criteria under assertions or use the agentv-eval-writer skill for authoring help.`
|
|
10244
10431
|
);
|
|
10245
10432
|
}
|
|
10246
10433
|
const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
|
|
@@ -10906,8 +11093,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
10906
11093
|
};
|
|
10907
11094
|
}
|
|
10908
11095
|
function resolveSandboxed(basePath, relativePath) {
|
|
10909
|
-
const resolved =
|
|
10910
|
-
if (!resolved.startsWith(basePath +
|
|
11096
|
+
const resolved = path33.resolve(basePath, relativePath);
|
|
11097
|
+
if (!resolved.startsWith(basePath + path33.sep) && resolved !== basePath) {
|
|
10911
11098
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
10912
11099
|
}
|
|
10913
11100
|
return resolved;
|
|
@@ -10997,11 +11184,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
10997
11184
|
for (const entry of entries) {
|
|
10998
11185
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
10999
11186
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
11000
|
-
const fullPath =
|
|
11187
|
+
const fullPath = path33.join(dirPath, entry.name);
|
|
11001
11188
|
if (entry.isDirectory()) {
|
|
11002
11189
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
11003
11190
|
} else if (entry.isFile()) {
|
|
11004
|
-
const ext =
|
|
11191
|
+
const ext = path33.extname(entry.name).toLowerCase();
|
|
11005
11192
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
11006
11193
|
try {
|
|
11007
11194
|
const stat8 = await fs2.stat(fullPath);
|
|
@@ -11013,7 +11200,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
11013
11200
|
regex.lastIndex = 0;
|
|
11014
11201
|
if (regex.test(lines[i])) {
|
|
11015
11202
|
matches.push({
|
|
11016
|
-
file:
|
|
11203
|
+
file: path33.relative(workspacePath, fullPath),
|
|
11017
11204
|
line: i + 1,
|
|
11018
11205
|
text: lines[i].substring(0, 200)
|
|
11019
11206
|
});
|
|
@@ -11648,115 +11835,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
11648
11835
|
* Evaluate a single field against the expected value.
|
|
11649
11836
|
*/
|
|
11650
11837
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
11651
|
-
const { path:
|
|
11652
|
-
const candidateValue = resolvePath(candidateData,
|
|
11653
|
-
const expectedValue = resolvePath(expectedData,
|
|
11838
|
+
const { path: path45, match, required = true, weight = 1 } = fieldConfig;
|
|
11839
|
+
const candidateValue = resolvePath(candidateData, path45);
|
|
11840
|
+
const expectedValue = resolvePath(expectedData, path45);
|
|
11654
11841
|
if (expectedValue === void 0) {
|
|
11655
11842
|
return {
|
|
11656
|
-
path:
|
|
11843
|
+
path: path45,
|
|
11657
11844
|
score: 1,
|
|
11658
11845
|
// No expected value means no comparison needed
|
|
11659
11846
|
weight,
|
|
11660
11847
|
hit: true,
|
|
11661
|
-
message: `${
|
|
11848
|
+
message: `${path45}: no expected value`
|
|
11662
11849
|
};
|
|
11663
11850
|
}
|
|
11664
11851
|
if (candidateValue === void 0) {
|
|
11665
11852
|
if (required) {
|
|
11666
11853
|
return {
|
|
11667
|
-
path:
|
|
11854
|
+
path: path45,
|
|
11668
11855
|
score: 0,
|
|
11669
11856
|
weight,
|
|
11670
11857
|
hit: false,
|
|
11671
|
-
message: `${
|
|
11858
|
+
message: `${path45} (required, missing)`
|
|
11672
11859
|
};
|
|
11673
11860
|
}
|
|
11674
11861
|
return {
|
|
11675
|
-
path:
|
|
11862
|
+
path: path45,
|
|
11676
11863
|
score: 1,
|
|
11677
11864
|
// Don't penalize missing optional fields
|
|
11678
11865
|
weight: 0,
|
|
11679
11866
|
// Zero weight means it won't affect the score
|
|
11680
11867
|
hit: true,
|
|
11681
|
-
message: `${
|
|
11868
|
+
message: `${path45}: optional field missing`
|
|
11682
11869
|
};
|
|
11683
11870
|
}
|
|
11684
11871
|
switch (match) {
|
|
11685
11872
|
case "exact":
|
|
11686
|
-
return this.compareExact(
|
|
11873
|
+
return this.compareExact(path45, candidateValue, expectedValue, weight);
|
|
11687
11874
|
case "numeric_tolerance":
|
|
11688
11875
|
return this.compareNumericTolerance(
|
|
11689
|
-
|
|
11876
|
+
path45,
|
|
11690
11877
|
candidateValue,
|
|
11691
11878
|
expectedValue,
|
|
11692
11879
|
fieldConfig,
|
|
11693
11880
|
weight
|
|
11694
11881
|
);
|
|
11695
11882
|
case "date":
|
|
11696
|
-
return this.compareDate(
|
|
11883
|
+
return this.compareDate(path45, candidateValue, expectedValue, fieldConfig, weight);
|
|
11697
11884
|
default:
|
|
11698
11885
|
return {
|
|
11699
|
-
path:
|
|
11886
|
+
path: path45,
|
|
11700
11887
|
score: 0,
|
|
11701
11888
|
weight,
|
|
11702
11889
|
hit: false,
|
|
11703
|
-
message: `${
|
|
11890
|
+
message: `${path45}: unknown match type "${match}"`
|
|
11704
11891
|
};
|
|
11705
11892
|
}
|
|
11706
11893
|
}
|
|
11707
11894
|
/**
|
|
11708
11895
|
* Exact equality comparison.
|
|
11709
11896
|
*/
|
|
11710
|
-
compareExact(
|
|
11897
|
+
compareExact(path45, candidateValue, expectedValue, weight) {
|
|
11711
11898
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
11712
11899
|
return {
|
|
11713
|
-
path:
|
|
11900
|
+
path: path45,
|
|
11714
11901
|
score: 1,
|
|
11715
11902
|
weight,
|
|
11716
11903
|
hit: true,
|
|
11717
|
-
message:
|
|
11904
|
+
message: path45
|
|
11718
11905
|
};
|
|
11719
11906
|
}
|
|
11720
11907
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
11721
11908
|
return {
|
|
11722
|
-
path:
|
|
11909
|
+
path: path45,
|
|
11723
11910
|
score: 0,
|
|
11724
11911
|
weight,
|
|
11725
11912
|
hit: false,
|
|
11726
|
-
message: `${
|
|
11913
|
+
message: `${path45} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
11727
11914
|
};
|
|
11728
11915
|
}
|
|
11729
11916
|
return {
|
|
11730
|
-
path:
|
|
11917
|
+
path: path45,
|
|
11731
11918
|
score: 0,
|
|
11732
11919
|
weight,
|
|
11733
11920
|
hit: false,
|
|
11734
|
-
message: `${
|
|
11921
|
+
message: `${path45} (value mismatch)`
|
|
11735
11922
|
};
|
|
11736
11923
|
}
|
|
11737
11924
|
/**
|
|
11738
11925
|
* Numeric comparison with absolute or relative tolerance.
|
|
11739
11926
|
*/
|
|
11740
|
-
compareNumericTolerance(
|
|
11927
|
+
compareNumericTolerance(path45, candidateValue, expectedValue, fieldConfig, weight) {
|
|
11741
11928
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
11742
11929
|
const candidateNum = toNumber(candidateValue);
|
|
11743
11930
|
const expectedNum = toNumber(expectedValue);
|
|
11744
11931
|
if (candidateNum === null || expectedNum === null) {
|
|
11745
11932
|
return {
|
|
11746
|
-
path:
|
|
11933
|
+
path: path45,
|
|
11747
11934
|
score: 0,
|
|
11748
11935
|
weight,
|
|
11749
11936
|
hit: false,
|
|
11750
|
-
message: `${
|
|
11937
|
+
message: `${path45} (non-numeric value)`
|
|
11751
11938
|
};
|
|
11752
11939
|
}
|
|
11753
11940
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
11754
11941
|
return {
|
|
11755
|
-
path:
|
|
11942
|
+
path: path45,
|
|
11756
11943
|
score: 0,
|
|
11757
11944
|
weight,
|
|
11758
11945
|
hit: false,
|
|
11759
|
-
message: `${
|
|
11946
|
+
message: `${path45} (invalid numeric value)`
|
|
11760
11947
|
};
|
|
11761
11948
|
}
|
|
11762
11949
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -11769,61 +11956,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
11769
11956
|
}
|
|
11770
11957
|
if (withinTolerance) {
|
|
11771
11958
|
return {
|
|
11772
|
-
path:
|
|
11959
|
+
path: path45,
|
|
11773
11960
|
score: 1,
|
|
11774
11961
|
weight,
|
|
11775
11962
|
hit: true,
|
|
11776
|
-
message: `${
|
|
11963
|
+
message: `${path45} (within tolerance: diff=${diff.toFixed(2)})`
|
|
11777
11964
|
};
|
|
11778
11965
|
}
|
|
11779
11966
|
return {
|
|
11780
|
-
path:
|
|
11967
|
+
path: path45,
|
|
11781
11968
|
score: 0,
|
|
11782
11969
|
weight,
|
|
11783
11970
|
hit: false,
|
|
11784
|
-
message: `${
|
|
11971
|
+
message: `${path45} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
11785
11972
|
};
|
|
11786
11973
|
}
|
|
11787
11974
|
/**
|
|
11788
11975
|
* Date comparison with format normalization.
|
|
11789
11976
|
*/
|
|
11790
|
-
compareDate(
|
|
11977
|
+
compareDate(path45, candidateValue, expectedValue, fieldConfig, weight) {
|
|
11791
11978
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
11792
11979
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
11793
11980
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
11794
11981
|
if (candidateDate === null) {
|
|
11795
11982
|
return {
|
|
11796
|
-
path:
|
|
11983
|
+
path: path45,
|
|
11797
11984
|
score: 0,
|
|
11798
11985
|
weight,
|
|
11799
11986
|
hit: false,
|
|
11800
|
-
message: `${
|
|
11987
|
+
message: `${path45} (unparseable candidate date)`
|
|
11801
11988
|
};
|
|
11802
11989
|
}
|
|
11803
11990
|
if (expectedDate === null) {
|
|
11804
11991
|
return {
|
|
11805
|
-
path:
|
|
11992
|
+
path: path45,
|
|
11806
11993
|
score: 0,
|
|
11807
11994
|
weight,
|
|
11808
11995
|
hit: false,
|
|
11809
|
-
message: `${
|
|
11996
|
+
message: `${path45} (unparseable expected date)`
|
|
11810
11997
|
};
|
|
11811
11998
|
}
|
|
11812
11999
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
11813
12000
|
return {
|
|
11814
|
-
path:
|
|
12001
|
+
path: path45,
|
|
11815
12002
|
score: 1,
|
|
11816
12003
|
weight,
|
|
11817
12004
|
hit: true,
|
|
11818
|
-
message:
|
|
12005
|
+
message: path45
|
|
11819
12006
|
};
|
|
11820
12007
|
}
|
|
11821
12008
|
return {
|
|
11822
|
-
path:
|
|
12009
|
+
path: path45,
|
|
11823
12010
|
score: 0,
|
|
11824
12011
|
weight,
|
|
11825
12012
|
hit: false,
|
|
11826
|
-
message: `${
|
|
12013
|
+
message: `${path45} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
11827
12014
|
};
|
|
11828
12015
|
}
|
|
11829
12016
|
/**
|
|
@@ -11856,11 +12043,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
11856
12043
|
};
|
|
11857
12044
|
}
|
|
11858
12045
|
};
|
|
11859
|
-
function resolvePath(obj,
|
|
11860
|
-
if (!
|
|
12046
|
+
function resolvePath(obj, path45) {
|
|
12047
|
+
if (!path45 || !obj) {
|
|
11861
12048
|
return void 0;
|
|
11862
12049
|
}
|
|
11863
|
-
const parts =
|
|
12050
|
+
const parts = path45.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
11864
12051
|
let current = obj;
|
|
11865
12052
|
for (const part of parts) {
|
|
11866
12053
|
if (current === null || current === void 0) {
|
|
@@ -12026,9 +12213,7 @@ var PROVIDER_TOOL_SEMANTICS = {
|
|
|
12026
12213
|
"claude-sdk": CLAUDE_MATCHER,
|
|
12027
12214
|
codex: CODEX_MATCHER,
|
|
12028
12215
|
"pi-coding-agent": PI_CODING_AGENT_MATCHER,
|
|
12029
|
-
|
|
12030
|
-
// TODO: consider removing pi-agent-sdk provider entirely.
|
|
12031
|
-
"pi-agent-sdk": PI_CODING_AGENT_MATCHER,
|
|
12216
|
+
"pi-cli": PI_CODING_AGENT_MATCHER,
|
|
12032
12217
|
"copilot-cli": COPILOT_MATCHER,
|
|
12033
12218
|
"copilot-sdk": COPILOT_MATCHER,
|
|
12034
12219
|
vscode: COPILOT_MATCHER,
|
|
@@ -12343,8 +12528,8 @@ var TokenUsageEvaluator = class {
|
|
|
12343
12528
|
};
|
|
12344
12529
|
|
|
12345
12530
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
12346
|
-
function getNestedValue(obj,
|
|
12347
|
-
const parts =
|
|
12531
|
+
function getNestedValue(obj, path45) {
|
|
12532
|
+
const parts = path45.split(".");
|
|
12348
12533
|
let current = obj;
|
|
12349
12534
|
for (const part of parts) {
|
|
12350
12535
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -12964,9 +13149,9 @@ function runEqualsAssertion(output, value) {
|
|
|
12964
13149
|
}
|
|
12965
13150
|
|
|
12966
13151
|
// src/evaluation/orchestrator.ts
|
|
12967
|
-
import { createHash as createHash2, randomUUID as
|
|
12968
|
-
import { copyFile as copyFile2, mkdir as
|
|
12969
|
-
import
|
|
13152
|
+
import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
|
|
13153
|
+
import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir6, stat as stat7 } from "node:fs/promises";
|
|
13154
|
+
import path42 from "node:path";
|
|
12970
13155
|
import micromatch3 from "micromatch";
|
|
12971
13156
|
|
|
12972
13157
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -13180,7 +13365,7 @@ var InlineAssertEvaluator = class {
|
|
|
13180
13365
|
};
|
|
13181
13366
|
|
|
13182
13367
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
13183
|
-
import
|
|
13368
|
+
import path34 from "node:path";
|
|
13184
13369
|
async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
|
|
13185
13370
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
13186
13371
|
if (!context) {
|
|
@@ -13226,7 +13411,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
|
|
|
13226
13411
|
};
|
|
13227
13412
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
13228
13413
|
const scriptPath = script[script.length - 1];
|
|
13229
|
-
const cwd =
|
|
13414
|
+
const cwd = path34.dirname(scriptPath);
|
|
13230
13415
|
try {
|
|
13231
13416
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
13232
13417
|
const prompt = stdout.trim();
|
|
@@ -13498,16 +13683,16 @@ function createBuiltinRegistry() {
|
|
|
13498
13683
|
}
|
|
13499
13684
|
|
|
13500
13685
|
// src/evaluation/registry/assertion-discovery.ts
|
|
13501
|
-
import
|
|
13686
|
+
import path35 from "node:path";
|
|
13502
13687
|
import fg2 from "fast-glob";
|
|
13503
13688
|
async function discoverAssertions(registry, baseDir) {
|
|
13504
13689
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13505
13690
|
const candidateDirs = [];
|
|
13506
|
-
let dir =
|
|
13507
|
-
const root =
|
|
13691
|
+
let dir = path35.resolve(baseDir);
|
|
13692
|
+
const root = path35.parse(dir).root;
|
|
13508
13693
|
while (dir !== root) {
|
|
13509
|
-
candidateDirs.push(
|
|
13510
|
-
dir =
|
|
13694
|
+
candidateDirs.push(path35.join(dir, ".agentv", "assertions"));
|
|
13695
|
+
dir = path35.dirname(dir);
|
|
13511
13696
|
}
|
|
13512
13697
|
let files = [];
|
|
13513
13698
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -13523,7 +13708,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13523
13708
|
}
|
|
13524
13709
|
const discoveredTypes = [];
|
|
13525
13710
|
for (const filePath of files) {
|
|
13526
|
-
const basename =
|
|
13711
|
+
const basename = path35.basename(filePath);
|
|
13527
13712
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13528
13713
|
if (registry.has(typeName)) {
|
|
13529
13714
|
continue;
|
|
@@ -13541,17 +13726,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
13541
13726
|
}
|
|
13542
13727
|
|
|
13543
13728
|
// src/evaluation/registry/grader-discovery.ts
|
|
13544
|
-
import
|
|
13729
|
+
import path36 from "node:path";
|
|
13545
13730
|
import fg3 from "fast-glob";
|
|
13546
13731
|
async function discoverGraders(registry, baseDir) {
|
|
13547
13732
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
13548
13733
|
const candidateDirs = [];
|
|
13549
|
-
let dir =
|
|
13550
|
-
const root =
|
|
13734
|
+
let dir = path36.resolve(baseDir);
|
|
13735
|
+
const root = path36.parse(dir).root;
|
|
13551
13736
|
while (dir !== root) {
|
|
13552
|
-
candidateDirs.push(
|
|
13553
|
-
candidateDirs.push(
|
|
13554
|
-
dir =
|
|
13737
|
+
candidateDirs.push(path36.join(dir, ".agentv", "graders"));
|
|
13738
|
+
candidateDirs.push(path36.join(dir, ".agentv", "judges"));
|
|
13739
|
+
dir = path36.dirname(dir);
|
|
13555
13740
|
}
|
|
13556
13741
|
let files = [];
|
|
13557
13742
|
for (const gradersDir of candidateDirs) {
|
|
@@ -13567,7 +13752,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
13567
13752
|
}
|
|
13568
13753
|
const discoveredTypes = [];
|
|
13569
13754
|
for (const filePath of files) {
|
|
13570
|
-
const basename =
|
|
13755
|
+
const basename = path36.basename(filePath);
|
|
13571
13756
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
13572
13757
|
if (registry.has(typeName)) {
|
|
13573
13758
|
continue;
|
|
@@ -13727,7 +13912,7 @@ function getTCritical(df) {
|
|
|
13727
13912
|
// src/evaluation/workspace/file-changes.ts
|
|
13728
13913
|
import { exec as execCallback } from "node:child_process";
|
|
13729
13914
|
import { readdirSync as readdirSync2, statSync } from "node:fs";
|
|
13730
|
-
import
|
|
13915
|
+
import path37 from "node:path";
|
|
13731
13916
|
import { promisify as promisify4 } from "node:util";
|
|
13732
13917
|
var execAsync4 = promisify4(execCallback);
|
|
13733
13918
|
function gitExecOpts(workspacePath) {
|
|
@@ -13761,10 +13946,10 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13761
13946
|
}
|
|
13762
13947
|
for (const entry of entries) {
|
|
13763
13948
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
13764
|
-
const childPath =
|
|
13949
|
+
const childPath = path37.join(workspacePath, entry);
|
|
13765
13950
|
try {
|
|
13766
13951
|
if (!statSync(childPath).isDirectory()) continue;
|
|
13767
|
-
if (!statSync(
|
|
13952
|
+
if (!statSync(path37.join(childPath, ".git")).isDirectory()) continue;
|
|
13768
13953
|
} catch {
|
|
13769
13954
|
continue;
|
|
13770
13955
|
}
|
|
@@ -13774,8 +13959,8 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
13774
13959
|
}
|
|
13775
13960
|
|
|
13776
13961
|
// src/evaluation/workspace/manager.ts
|
|
13777
|
-
import { cp, mkdir as
|
|
13778
|
-
import
|
|
13962
|
+
import { cp, mkdir as mkdir12, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
|
|
13963
|
+
import path38 from "node:path";
|
|
13779
13964
|
var TemplateNotFoundError = class extends Error {
|
|
13780
13965
|
constructor(templatePath) {
|
|
13781
13966
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -13805,14 +13990,14 @@ async function isDirectory(filePath) {
|
|
|
13805
13990
|
}
|
|
13806
13991
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
13807
13992
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
13808
|
-
return
|
|
13993
|
+
return path38.join(root, evalRunId, caseId);
|
|
13809
13994
|
}
|
|
13810
13995
|
async function copyDirectoryRecursive(src, dest) {
|
|
13811
|
-
await
|
|
13996
|
+
await mkdir12(dest, { recursive: true });
|
|
13812
13997
|
const entries = await readdir3(src, { withFileTypes: true });
|
|
13813
13998
|
for (const entry of entries) {
|
|
13814
|
-
const srcPath =
|
|
13815
|
-
const destPath =
|
|
13999
|
+
const srcPath = path38.join(src, entry.name);
|
|
14000
|
+
const destPath = path38.join(dest, entry.name);
|
|
13816
14001
|
if (entry.name === ".git") {
|
|
13817
14002
|
continue;
|
|
13818
14003
|
}
|
|
@@ -13824,7 +14009,7 @@ async function copyDirectoryRecursive(src, dest) {
|
|
|
13824
14009
|
}
|
|
13825
14010
|
}
|
|
13826
14011
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
13827
|
-
const resolvedTemplatePath =
|
|
14012
|
+
const resolvedTemplatePath = path38.resolve(templatePath);
|
|
13828
14013
|
if (!await fileExists(resolvedTemplatePath)) {
|
|
13829
14014
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
13830
14015
|
}
|
|
@@ -13873,7 +14058,7 @@ async function cleanupWorkspace(workspacePath) {
|
|
|
13873
14058
|
}
|
|
13874
14059
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
13875
14060
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
13876
|
-
const evalDir =
|
|
14061
|
+
const evalDir = path38.join(root, evalRunId);
|
|
13877
14062
|
if (await fileExists(evalDir)) {
|
|
13878
14063
|
await rm4(evalDir, { recursive: true, force: true });
|
|
13879
14064
|
}
|
|
@@ -13883,8 +14068,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
|
13883
14068
|
import { execFile } from "node:child_process";
|
|
13884
14069
|
import { createHash } from "node:crypto";
|
|
13885
14070
|
import { existsSync as existsSync2 } from "node:fs";
|
|
13886
|
-
import { cp as cp2, mkdir as
|
|
13887
|
-
import
|
|
14071
|
+
import { cp as cp2, mkdir as mkdir13, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
|
|
14072
|
+
import path39 from "node:path";
|
|
13888
14073
|
import { promisify as promisify5 } from "node:util";
|
|
13889
14074
|
var execFileAsync = promisify5(execFile);
|
|
13890
14075
|
function gitEnv() {
|
|
@@ -13935,11 +14120,11 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
13935
14120
|
return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
13936
14121
|
}
|
|
13937
14122
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
13938
|
-
await
|
|
14123
|
+
await mkdir13(dest, { recursive: true });
|
|
13939
14124
|
const entries = await readdir4(src, { withFileTypes: true });
|
|
13940
14125
|
for (const entry of entries) {
|
|
13941
|
-
const srcPath =
|
|
13942
|
-
const destPath =
|
|
14126
|
+
const srcPath = path39.join(src, entry.name);
|
|
14127
|
+
const destPath = path39.join(dest, entry.name);
|
|
13943
14128
|
if (entry.name === ".git") {
|
|
13944
14129
|
continue;
|
|
13945
14130
|
}
|
|
@@ -13972,8 +14157,8 @@ var WorkspacePoolManager = class {
|
|
|
13972
14157
|
async acquireWorkspace(options) {
|
|
13973
14158
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
13974
14159
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
13975
|
-
const poolDir =
|
|
13976
|
-
await
|
|
14160
|
+
const poolDir = path39.join(this.poolRoot, fingerprint);
|
|
14161
|
+
await mkdir13(poolDir, { recursive: true });
|
|
13977
14162
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
13978
14163
|
if (drifted) {
|
|
13979
14164
|
console.warn(
|
|
@@ -13982,7 +14167,7 @@ var WorkspacePoolManager = class {
|
|
|
13982
14167
|
await this.removeAllSlots(poolDir);
|
|
13983
14168
|
}
|
|
13984
14169
|
for (let i = 0; i < maxSlots; i++) {
|
|
13985
|
-
const slotPath =
|
|
14170
|
+
const slotPath = path39.join(poolDir, `slot-${i}`);
|
|
13986
14171
|
const lockPath = `${slotPath}.lock`;
|
|
13987
14172
|
const locked = await this.tryLock(lockPath);
|
|
13988
14173
|
if (!locked) {
|
|
@@ -14000,7 +14185,7 @@ var WorkspacePoolManager = class {
|
|
|
14000
14185
|
poolDir
|
|
14001
14186
|
};
|
|
14002
14187
|
}
|
|
14003
|
-
await
|
|
14188
|
+
await mkdir13(slotPath, { recursive: true });
|
|
14004
14189
|
if (templatePath) {
|
|
14005
14190
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
14006
14191
|
}
|
|
@@ -14069,7 +14254,7 @@ var WorkspacePoolManager = class {
|
|
|
14069
14254
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
14070
14255
|
*/
|
|
14071
14256
|
async checkDrift(poolDir, fingerprint) {
|
|
14072
|
-
const metadataPath =
|
|
14257
|
+
const metadataPath = path39.join(poolDir, "metadata.json");
|
|
14073
14258
|
try {
|
|
14074
14259
|
const raw = await readFile10(metadataPath, "utf-8");
|
|
14075
14260
|
const metadata = JSON.parse(raw);
|
|
@@ -14086,14 +14271,14 @@ var WorkspacePoolManager = class {
|
|
|
14086
14271
|
repos,
|
|
14087
14272
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
14088
14273
|
};
|
|
14089
|
-
await writeFile7(
|
|
14274
|
+
await writeFile7(path39.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
14090
14275
|
}
|
|
14091
14276
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
14092
14277
|
async removeAllSlots(poolDir) {
|
|
14093
14278
|
const entries = await readdir4(poolDir);
|
|
14094
14279
|
for (const entry of entries) {
|
|
14095
14280
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
14096
|
-
const lockPath =
|
|
14281
|
+
const lockPath = path39.join(poolDir, `${entry}.lock`);
|
|
14097
14282
|
if (existsSync2(lockPath)) {
|
|
14098
14283
|
try {
|
|
14099
14284
|
const pidStr = await readFile10(lockPath, "utf-8");
|
|
@@ -14109,12 +14294,12 @@ var WorkspacePoolManager = class {
|
|
|
14109
14294
|
} catch {
|
|
14110
14295
|
}
|
|
14111
14296
|
}
|
|
14112
|
-
await rm5(
|
|
14297
|
+
await rm5(path39.join(poolDir, entry), { recursive: true, force: true });
|
|
14113
14298
|
await rm5(lockPath, { force: true }).catch(() => {
|
|
14114
14299
|
});
|
|
14115
14300
|
}
|
|
14116
14301
|
}
|
|
14117
|
-
await rm5(
|
|
14302
|
+
await rm5(path39.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
14118
14303
|
});
|
|
14119
14304
|
}
|
|
14120
14305
|
/**
|
|
@@ -14124,7 +14309,7 @@ var WorkspacePoolManager = class {
|
|
|
14124
14309
|
*/
|
|
14125
14310
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
14126
14311
|
for (const repo of repos) {
|
|
14127
|
-
const repoDir =
|
|
14312
|
+
const repoDir = path39.join(slotPath, repo.path);
|
|
14128
14313
|
if (!existsSync2(repoDir)) {
|
|
14129
14314
|
continue;
|
|
14130
14315
|
}
|
|
@@ -14151,7 +14336,7 @@ var WorkspacePoolManager = class {
|
|
|
14151
14336
|
// src/evaluation/workspace/repo-manager.ts
|
|
14152
14337
|
import { execFile as execFile2 } from "node:child_process";
|
|
14153
14338
|
import { existsSync as existsSync3 } from "node:fs";
|
|
14154
|
-
import
|
|
14339
|
+
import path40 from "node:path";
|
|
14155
14340
|
import { promisify as promisify6 } from "node:util";
|
|
14156
14341
|
var execFileAsync2 = promisify6(execFile2);
|
|
14157
14342
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
@@ -14251,7 +14436,7 @@ ${lines.join("\n")}`;
|
|
|
14251
14436
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
14252
14437
|
*/
|
|
14253
14438
|
async materialize(repo, workspacePath) {
|
|
14254
|
-
const targetDir =
|
|
14439
|
+
const targetDir = path40.join(workspacePath, repo.path);
|
|
14255
14440
|
const sourceUrl = getSourceUrl(repo.source);
|
|
14256
14441
|
const startedAt = Date.now();
|
|
14257
14442
|
if (this.verbose) {
|
|
@@ -14342,7 +14527,7 @@ ${lines.join("\n")}`;
|
|
|
14342
14527
|
async reset(repos, workspacePath, reset) {
|
|
14343
14528
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
14344
14529
|
for (const repo of repos) {
|
|
14345
|
-
const targetDir =
|
|
14530
|
+
const targetDir = path40.join(workspacePath, repo.path);
|
|
14346
14531
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
14347
14532
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
14348
14533
|
}
|
|
@@ -14351,16 +14536,16 @@ ${lines.join("\n")}`;
|
|
|
14351
14536
|
|
|
14352
14537
|
// src/evaluation/workspace/resolve.ts
|
|
14353
14538
|
import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
|
|
14354
|
-
import
|
|
14539
|
+
import path41 from "node:path";
|
|
14355
14540
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
14356
14541
|
if (!templatePath) {
|
|
14357
14542
|
return void 0;
|
|
14358
14543
|
}
|
|
14359
|
-
const resolved =
|
|
14544
|
+
const resolved = path41.resolve(templatePath);
|
|
14360
14545
|
const stats = await stat6(resolved);
|
|
14361
14546
|
if (stats.isFile()) {
|
|
14362
14547
|
return {
|
|
14363
|
-
dir:
|
|
14548
|
+
dir: path41.dirname(resolved),
|
|
14364
14549
|
workspaceFile: resolved
|
|
14365
14550
|
};
|
|
14366
14551
|
}
|
|
@@ -14372,14 +14557,14 @@ async function resolveWorkspaceTemplate(templatePath) {
|
|
|
14372
14557
|
if (workspaceFiles.length === 1) {
|
|
14373
14558
|
return {
|
|
14374
14559
|
dir: resolved,
|
|
14375
|
-
workspaceFile:
|
|
14560
|
+
workspaceFile: path41.join(resolved, workspaceFiles[0])
|
|
14376
14561
|
};
|
|
14377
14562
|
}
|
|
14378
14563
|
if (workspaceFiles.length > 1) {
|
|
14379
14564
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
14380
14565
|
return {
|
|
14381
14566
|
dir: resolved,
|
|
14382
|
-
workspaceFile: conventionFile ?
|
|
14567
|
+
workspaceFile: conventionFile ? path41.join(resolved, conventionFile) : void 0
|
|
14383
14568
|
};
|
|
14384
14569
|
}
|
|
14385
14570
|
return { dir: resolved };
|
|
@@ -14516,7 +14701,7 @@ async function runEvaluation(options) {
|
|
|
14516
14701
|
);
|
|
14517
14702
|
useCache = false;
|
|
14518
14703
|
}
|
|
14519
|
-
const evalRunId =
|
|
14704
|
+
const evalRunId = randomUUID9();
|
|
14520
14705
|
const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
|
|
14521
14706
|
const filteredEvalCases = filterEvalCases(evalCases, filter);
|
|
14522
14707
|
if (filteredEvalCases.length === 0) {
|
|
@@ -14595,7 +14780,7 @@ async function runEvaluation(options) {
|
|
|
14595
14780
|
];
|
|
14596
14781
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
14597
14782
|
const typeRegistry = createBuiltinRegistry();
|
|
14598
|
-
const discoveryBaseDir = evalFilePath ?
|
|
14783
|
+
const discoveryBaseDir = evalFilePath ? path42.dirname(path42.resolve(evalFilePath)) : process.cwd();
|
|
14599
14784
|
const evalDir = discoveryBaseDir;
|
|
14600
14785
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
14601
14786
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -14742,7 +14927,7 @@ async function runEvaluation(options) {
|
|
|
14742
14927
|
const isEmpty = dirExists ? (await readdir6(configuredStaticPath)).length === 0 : false;
|
|
14743
14928
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
14744
14929
|
if (!dirExists) {
|
|
14745
|
-
await
|
|
14930
|
+
await mkdir14(configuredStaticPath, { recursive: true });
|
|
14746
14931
|
}
|
|
14747
14932
|
if (workspaceTemplate) {
|
|
14748
14933
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -14787,12 +14972,12 @@ async function runEvaluation(options) {
|
|
|
14787
14972
|
}
|
|
14788
14973
|
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
14789
14974
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
14790
|
-
await
|
|
14975
|
+
await mkdir14(sharedWorkspacePath, { recursive: true });
|
|
14791
14976
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
14792
14977
|
}
|
|
14793
14978
|
try {
|
|
14794
14979
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
14795
|
-
const copiedWorkspaceFile =
|
|
14980
|
+
const copiedWorkspaceFile = path42.join(sharedWorkspacePath, path42.basename(suiteWorkspaceFile));
|
|
14796
14981
|
try {
|
|
14797
14982
|
await stat7(copiedWorkspaceFile);
|
|
14798
14983
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
@@ -15374,7 +15559,7 @@ async function runEvalCase(options) {
|
|
|
15374
15559
|
);
|
|
15375
15560
|
}
|
|
15376
15561
|
if (caseWorkspaceFile && workspacePath) {
|
|
15377
|
-
const copiedFile =
|
|
15562
|
+
const copiedFile = path42.join(workspacePath, path42.basename(caseWorkspaceFile));
|
|
15378
15563
|
try {
|
|
15379
15564
|
await stat7(copiedFile);
|
|
15380
15565
|
caseWorkspaceFile = copiedFile;
|
|
@@ -15384,7 +15569,7 @@ async function runEvalCase(options) {
|
|
|
15384
15569
|
}
|
|
15385
15570
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
15386
15571
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
15387
|
-
await
|
|
15572
|
+
await mkdir14(workspacePath, { recursive: true });
|
|
15388
15573
|
}
|
|
15389
15574
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
15390
15575
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -15436,10 +15621,10 @@ async function runEvalCase(options) {
|
|
|
15436
15621
|
const files = evalCase.metadata.agent_skills_files;
|
|
15437
15622
|
if (baseDir && files.length > 0) {
|
|
15438
15623
|
for (const relPath of files) {
|
|
15439
|
-
const srcPath =
|
|
15440
|
-
const destPath =
|
|
15624
|
+
const srcPath = path42.resolve(baseDir, relPath);
|
|
15625
|
+
const destPath = path42.resolve(workspacePath, relPath);
|
|
15441
15626
|
try {
|
|
15442
|
-
await
|
|
15627
|
+
await mkdir14(path42.dirname(destPath), { recursive: true });
|
|
15443
15628
|
await copyFile2(srcPath, destPath);
|
|
15444
15629
|
} catch (error) {
|
|
15445
15630
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -16085,7 +16270,7 @@ async function runEvaluatorList(options) {
|
|
|
16085
16270
|
fileChanges,
|
|
16086
16271
|
workspacePath
|
|
16087
16272
|
};
|
|
16088
|
-
const evalFileDir = evalCase.file_paths[0] ?
|
|
16273
|
+
const evalFileDir = evalCase.file_paths[0] ? path42.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
16089
16274
|
const dispatchContext = {
|
|
16090
16275
|
graderProvider,
|
|
16091
16276
|
targetResolver,
|
|
@@ -16419,7 +16604,7 @@ function computeWeightedMean(entries) {
|
|
|
16419
16604
|
|
|
16420
16605
|
// src/evaluation/evaluate.ts
|
|
16421
16606
|
import { existsSync as existsSync4 } from "node:fs";
|
|
16422
|
-
import
|
|
16607
|
+
import path43 from "node:path";
|
|
16423
16608
|
|
|
16424
16609
|
// src/evaluation/providers/function-provider.ts
|
|
16425
16610
|
function createFunctionProvider(taskFn) {
|
|
@@ -16456,7 +16641,7 @@ async function evaluate(config) {
|
|
|
16456
16641
|
}
|
|
16457
16642
|
const gitRoot = await findGitRoot(process.cwd());
|
|
16458
16643
|
const repoRoot = gitRoot ?? process.cwd();
|
|
16459
|
-
const testFilePath = config.specFile ?
|
|
16644
|
+
const testFilePath = config.specFile ? path43.resolve(config.specFile) : path43.join(process.cwd(), "__programmatic__.yaml");
|
|
16460
16645
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
16461
16646
|
let resolvedTarget;
|
|
16462
16647
|
let taskProvider;
|
|
@@ -16577,10 +16762,10 @@ function computeSummary(results, durationMs) {
|
|
|
16577
16762
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
16578
16763
|
async function discoverDefaultTarget(repoRoot) {
|
|
16579
16764
|
const cwd = process.cwd();
|
|
16580
|
-
const chain = buildDirectoryChain(
|
|
16765
|
+
const chain = buildDirectoryChain(path43.join(cwd, "_placeholder"), repoRoot);
|
|
16581
16766
|
for (const dir of chain) {
|
|
16582
16767
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
16583
|
-
const targetsPath =
|
|
16768
|
+
const targetsPath = path43.join(dir, candidate);
|
|
16584
16769
|
if (!existsSync4(targetsPath)) continue;
|
|
16585
16770
|
try {
|
|
16586
16771
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
@@ -16597,7 +16782,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
16597
16782
|
const chain = buildDirectoryChain(startPath, repoRoot);
|
|
16598
16783
|
const envFiles = [];
|
|
16599
16784
|
for (const dir of chain) {
|
|
16600
|
-
const envPath =
|
|
16785
|
+
const envPath = path43.join(dir, ".env");
|
|
16601
16786
|
if (existsSync4(envPath)) envFiles.push(envPath);
|
|
16602
16787
|
}
|
|
16603
16788
|
for (let i = 0; i < envFiles.length; i++) {
|
|
@@ -16778,8 +16963,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
16778
16963
|
}
|
|
16779
16964
|
|
|
16780
16965
|
// src/evaluation/cache/response-cache.ts
|
|
16781
|
-
import { mkdir as
|
|
16782
|
-
import
|
|
16966
|
+
import { mkdir as mkdir15, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
|
|
16967
|
+
import path44 from "node:path";
|
|
16783
16968
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
16784
16969
|
var ResponseCache = class {
|
|
16785
16970
|
cachePath;
|
|
@@ -16797,13 +16982,13 @@ var ResponseCache = class {
|
|
|
16797
16982
|
}
|
|
16798
16983
|
async set(key, value) {
|
|
16799
16984
|
const filePath = this.keyToPath(key);
|
|
16800
|
-
const dir =
|
|
16801
|
-
await
|
|
16985
|
+
const dir = path44.dirname(filePath);
|
|
16986
|
+
await mkdir15(dir, { recursive: true });
|
|
16802
16987
|
await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
16803
16988
|
}
|
|
16804
16989
|
keyToPath(key) {
|
|
16805
16990
|
const prefix = key.slice(0, 2);
|
|
16806
|
-
return
|
|
16991
|
+
return path44.join(this.cachePath, prefix, `${key}.json`);
|
|
16807
16992
|
}
|
|
16808
16993
|
};
|
|
16809
16994
|
function shouldEnableCache(params) {
|
|
@@ -16937,7 +17122,7 @@ var OtelTraceExporter = class {
|
|
|
16937
17122
|
);
|
|
16938
17123
|
}
|
|
16939
17124
|
if (this.options.traceFilePath) {
|
|
16940
|
-
const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await import("./simple-trace-file-exporter-
|
|
17125
|
+
const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await import("./simple-trace-file-exporter-CRIO5HDZ.js");
|
|
16941
17126
|
processors.push(
|
|
16942
17127
|
new SimpleSpanProcessor(new SimpleTraceFileExporter2(this.options.traceFilePath))
|
|
16943
17128
|
);
|
|
@@ -17001,6 +17186,17 @@ var OtelTraceExporter = class {
|
|
|
17001
17186
|
if (result.durationMs != null)
|
|
17002
17187
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
17003
17188
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|
|
17189
|
+
if (result.tokenUsage) {
|
|
17190
|
+
if (result.tokenUsage.input != null) {
|
|
17191
|
+
rootSpan.setAttribute("agentv.trace.token_input", result.tokenUsage.input);
|
|
17192
|
+
}
|
|
17193
|
+
if (result.tokenUsage.output != null) {
|
|
17194
|
+
rootSpan.setAttribute("agentv.trace.token_output", result.tokenUsage.output);
|
|
17195
|
+
}
|
|
17196
|
+
if (result.tokenUsage.cached != null) {
|
|
17197
|
+
rootSpan.setAttribute("agentv.trace.token_cached", result.tokenUsage.cached);
|
|
17198
|
+
}
|
|
17199
|
+
}
|
|
17004
17200
|
if (result.trace) {
|
|
17005
17201
|
const t = result.trace;
|
|
17006
17202
|
rootSpan.setAttribute("agentv.trace.event_count", t.eventCount);
|
|
@@ -17103,6 +17299,7 @@ var OtelTraceExporter = class {
|
|
|
17103
17299
|
tracer.startActiveSpan(
|
|
17104
17300
|
spanName,
|
|
17105
17301
|
{ startTime: startHr },
|
|
17302
|
+
parentCtx,
|
|
17106
17303
|
(span) => {
|
|
17107
17304
|
if (isAssistant) {
|
|
17108
17305
|
span.setAttribute("gen_ai.operation.name", "chat");
|
|
@@ -17135,6 +17332,7 @@ var OtelTraceExporter = class {
|
|
|
17135
17332
|
tracer.startActiveSpan(
|
|
17136
17333
|
`execute_tool ${tc.tool}`,
|
|
17137
17334
|
{},
|
|
17335
|
+
msgCtx,
|
|
17138
17336
|
(toolSpan) => {
|
|
17139
17337
|
toolSpan.setAttribute("gen_ai.tool.name", tc.tool);
|
|
17140
17338
|
if (tc.id) toolSpan.setAttribute("gen_ai.tool.call.id", tc.id);
|
|
@@ -17175,8 +17373,12 @@ var OtelStreamingObserver = class {
|
|
|
17175
17373
|
rootSpan = null;
|
|
17176
17374
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
17177
17375
|
rootCtx = null;
|
|
17376
|
+
observedChildSpans = false;
|
|
17377
|
+
pendingMetrics = null;
|
|
17178
17378
|
/** Create root eval span immediately (visible in backend right away) */
|
|
17179
17379
|
startEvalCase(testId, target, evalSet) {
|
|
17380
|
+
this.pendingMetrics = null;
|
|
17381
|
+
this.observedChildSpans = false;
|
|
17180
17382
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
17181
17383
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
17182
17384
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
@@ -17189,8 +17391,9 @@ var OtelStreamingObserver = class {
|
|
|
17189
17391
|
/** Create and immediately export a tool span */
|
|
17190
17392
|
onToolCall(name, input, output, _durationMs, toolCallId) {
|
|
17191
17393
|
if (!this.rootCtx) return;
|
|
17394
|
+
this.observedChildSpans = true;
|
|
17192
17395
|
this.api.context.with(this.rootCtx, () => {
|
|
17193
|
-
const span = this.tracer.startSpan(`execute_tool ${name}
|
|
17396
|
+
const span = this.tracer.startSpan(`execute_tool ${name}`, void 0, this.rootCtx);
|
|
17194
17397
|
span.setAttribute("gen_ai.tool.name", name);
|
|
17195
17398
|
if (toolCallId) span.setAttribute("gen_ai.tool.call.id", toolCallId);
|
|
17196
17399
|
if (this.captureContent) {
|
|
@@ -17211,8 +17414,9 @@ var OtelStreamingObserver = class {
|
|
|
17211
17414
|
/** Create and immediately export an LLM span */
|
|
17212
17415
|
onLlmCall(model, tokenUsage) {
|
|
17213
17416
|
if (!this.rootCtx) return;
|
|
17417
|
+
this.observedChildSpans = true;
|
|
17214
17418
|
this.api.context.with(this.rootCtx, () => {
|
|
17215
|
-
const span = this.tracer.startSpan(`chat ${model}
|
|
17419
|
+
const span = this.tracer.startSpan(`chat ${model}`, void 0, this.rootCtx);
|
|
17216
17420
|
span.setAttribute("gen_ai.operation.name", "chat");
|
|
17217
17421
|
span.setAttribute("gen_ai.request.model", model);
|
|
17218
17422
|
span.setAttribute("gen_ai.response.model", model);
|
|
@@ -17227,10 +17431,53 @@ var OtelStreamingObserver = class {
|
|
|
17227
17431
|
span.end();
|
|
17228
17432
|
});
|
|
17229
17433
|
}
|
|
17434
|
+
/** Record final execution metrics before the root span is finalized. */
|
|
17435
|
+
recordEvalMetrics(result) {
|
|
17436
|
+
this.pendingMetrics = result;
|
|
17437
|
+
}
|
|
17230
17438
|
/** Finalize root span with score/verdict after evaluation completes */
|
|
17231
17439
|
finalizeEvalCase(score, error) {
|
|
17232
17440
|
if (!this.rootSpan) return;
|
|
17233
17441
|
this.rootSpan.setAttribute("agentv.score", score);
|
|
17442
|
+
if (this.pendingMetrics?.durationMs != null) {
|
|
17443
|
+
this.rootSpan.setAttribute("agentv.trace.duration_ms", this.pendingMetrics.durationMs);
|
|
17444
|
+
}
|
|
17445
|
+
if (this.pendingMetrics?.costUsd != null) {
|
|
17446
|
+
this.rootSpan.setAttribute("agentv.trace.cost_usd", this.pendingMetrics.costUsd);
|
|
17447
|
+
}
|
|
17448
|
+
if (this.pendingMetrics?.tokenUsage) {
|
|
17449
|
+
if (this.pendingMetrics.tokenUsage.input != null) {
|
|
17450
|
+
this.rootSpan.setAttribute(
|
|
17451
|
+
"agentv.trace.token_input",
|
|
17452
|
+
this.pendingMetrics.tokenUsage.input
|
|
17453
|
+
);
|
|
17454
|
+
}
|
|
17455
|
+
if (this.pendingMetrics.tokenUsage.output != null) {
|
|
17456
|
+
this.rootSpan.setAttribute(
|
|
17457
|
+
"agentv.trace.token_output",
|
|
17458
|
+
this.pendingMetrics.tokenUsage.output
|
|
17459
|
+
);
|
|
17460
|
+
}
|
|
17461
|
+
if (this.pendingMetrics.tokenUsage.cached != null) {
|
|
17462
|
+
this.rootSpan.setAttribute(
|
|
17463
|
+
"agentv.trace.token_cached",
|
|
17464
|
+
this.pendingMetrics.tokenUsage.cached
|
|
17465
|
+
);
|
|
17466
|
+
}
|
|
17467
|
+
}
|
|
17468
|
+
if (this.pendingMetrics?.trace) {
|
|
17469
|
+
this.rootSpan.setAttribute("agentv.trace.event_count", this.pendingMetrics.trace.eventCount);
|
|
17470
|
+
this.rootSpan.setAttribute(
|
|
17471
|
+
"agentv.trace.tool_names",
|
|
17472
|
+
Object.keys(this.pendingMetrics.trace.toolCalls).sort().join(",")
|
|
17473
|
+
);
|
|
17474
|
+
if (this.pendingMetrics.trace.llmCallCount != null) {
|
|
17475
|
+
this.rootSpan.setAttribute(
|
|
17476
|
+
"agentv.trace.llm_call_count",
|
|
17477
|
+
this.pendingMetrics.trace.llmCallCount
|
|
17478
|
+
);
|
|
17479
|
+
}
|
|
17480
|
+
}
|
|
17234
17481
|
if (error) {
|
|
17235
17482
|
this.rootSpan.setStatus({ code: this.api.SpanStatusCode.ERROR, message: error });
|
|
17236
17483
|
} else {
|
|
@@ -17239,6 +17486,33 @@ var OtelStreamingObserver = class {
|
|
|
17239
17486
|
this.rootSpan.end();
|
|
17240
17487
|
this.rootSpan = null;
|
|
17241
17488
|
this.rootCtx = null;
|
|
17489
|
+
this.observedChildSpans = false;
|
|
17490
|
+
this.pendingMetrics = null;
|
|
17491
|
+
}
|
|
17492
|
+
/** Backfill child spans from the completed result when the provider emitted no live callbacks. */
|
|
17493
|
+
completeFromResult(result) {
|
|
17494
|
+
this.recordEvalMetrics({
|
|
17495
|
+
durationMs: result.durationMs,
|
|
17496
|
+
costUsd: result.costUsd,
|
|
17497
|
+
tokenUsage: result.tokenUsage,
|
|
17498
|
+
trace: result.trace
|
|
17499
|
+
});
|
|
17500
|
+
if (this.observedChildSpans || !this.rootCtx) {
|
|
17501
|
+
return;
|
|
17502
|
+
}
|
|
17503
|
+
const model = result.output.find((msg) => msg.role === "assistant")?.metadata?.model ?? result.target ?? "unknown";
|
|
17504
|
+
this.onLlmCall(String(model), result.tokenUsage);
|
|
17505
|
+
for (const message of result.output) {
|
|
17506
|
+
for (const toolCall of message.toolCalls ?? []) {
|
|
17507
|
+
this.onToolCall(
|
|
17508
|
+
toolCall.tool,
|
|
17509
|
+
toolCall.input,
|
|
17510
|
+
toolCall.output,
|
|
17511
|
+
toolCall.durationMs ?? 0,
|
|
17512
|
+
toolCall.id
|
|
17513
|
+
);
|
|
17514
|
+
}
|
|
17515
|
+
}
|
|
17242
17516
|
}
|
|
17243
17517
|
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
17244
17518
|
getActiveSpanIds() {
|