@agentv/core 3.11.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-HMXZ2AX4.js → chunk-3G2KXH7N.js} +31 -23
- package/dist/chunk-3G2KXH7N.js.map +1 -0
- package/dist/{chunk-AVTN5AB7.js → chunk-4XWPXNQM.js} +62 -24
- package/dist/chunk-4XWPXNQM.js.map +1 -0
- package/dist/evaluation/validation/index.cjs +1 -1
- package/dist/evaluation/validation/index.cjs.map +1 -1
- package/dist/evaluation/validation/index.js +1 -1
- package/dist/index.cjs +1120 -800
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +29 -8
- package/dist/index.d.ts +29 -8
- package/dist/index.js +956 -682
- package/dist/index.js.map +1 -1
- package/dist/simple-trace-file-exporter-CRIO5HDZ.js +7 -0
- package/package.json +9 -3
- package/dist/chunk-AVTN5AB7.js.map +0 -1
- package/dist/chunk-HMXZ2AX4.js.map +0 -1
- package/dist/simple-trace-file-exporter-S76DMABU.js +0 -7
- /package/dist/{simple-trace-file-exporter-S76DMABU.js.map → simple-trace-file-exporter-CRIO5HDZ.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -1315,12 +1315,12 @@ function serializeAttributeValue(value) {
|
|
|
1315
1315
|
if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
|
|
1316
1316
|
return { stringValue: String(value) };
|
|
1317
1317
|
}
|
|
1318
|
-
var
|
|
1318
|
+
var import_promises33, import_node_path49, OtlpJsonFileExporter;
|
|
1319
1319
|
var init_otlp_json_file_exporter = __esm({
|
|
1320
1320
|
"src/observability/otlp-json-file-exporter.ts"() {
|
|
1321
1321
|
"use strict";
|
|
1322
|
-
|
|
1323
|
-
|
|
1322
|
+
import_promises33 = require("fs/promises");
|
|
1323
|
+
import_node_path49 = require("path");
|
|
1324
1324
|
OtlpJsonFileExporter = class {
|
|
1325
1325
|
// biome-ignore lint/suspicious/noExplicitAny: serialized span data
|
|
1326
1326
|
spans = [];
|
|
@@ -1359,7 +1359,7 @@ var init_otlp_json_file_exporter = __esm({
|
|
|
1359
1359
|
}
|
|
1360
1360
|
async flush() {
|
|
1361
1361
|
if (this.spans.length === 0) return;
|
|
1362
|
-
await (0,
|
|
1362
|
+
await (0, import_promises33.mkdir)((0, import_node_path49.dirname)(this.filePath), { recursive: true });
|
|
1363
1363
|
const otlpJson = {
|
|
1364
1364
|
resourceSpans: [
|
|
1365
1365
|
{
|
|
@@ -1390,27 +1390,28 @@ function hrTimeDiffMs(start, end) {
|
|
|
1390
1390
|
const diffNano = end[1] - start[1];
|
|
1391
1391
|
return Math.round(diffSec * 1e3 + diffNano / 1e6);
|
|
1392
1392
|
}
|
|
1393
|
-
var
|
|
1393
|
+
var import_node_fs16, import_promises34, import_node_path50, SimpleTraceFileExporter;
|
|
1394
1394
|
var init_simple_trace_file_exporter = __esm({
|
|
1395
1395
|
"src/observability/simple-trace-file-exporter.ts"() {
|
|
1396
1396
|
"use strict";
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1397
|
+
import_node_fs16 = require("fs");
|
|
1398
|
+
import_promises34 = require("fs/promises");
|
|
1399
|
+
import_node_path50 = require("path");
|
|
1400
1400
|
SimpleTraceFileExporter = class {
|
|
1401
1401
|
stream = null;
|
|
1402
1402
|
filePath;
|
|
1403
1403
|
streamReady = null;
|
|
1404
1404
|
pendingWrites = [];
|
|
1405
1405
|
_shuttingDown = false;
|
|
1406
|
+
spansByTraceId = /* @__PURE__ */ new Map();
|
|
1406
1407
|
constructor(filePath) {
|
|
1407
1408
|
this.filePath = filePath;
|
|
1408
1409
|
}
|
|
1409
1410
|
async ensureStream() {
|
|
1410
1411
|
if (!this.streamReady) {
|
|
1411
1412
|
this.streamReady = (async () => {
|
|
1412
|
-
await (0,
|
|
1413
|
-
this.stream = (0,
|
|
1413
|
+
await (0, import_promises34.mkdir)((0, import_node_path50.dirname)(this.filePath), { recursive: true });
|
|
1414
|
+
this.stream = (0, import_node_fs16.createWriteStream)(this.filePath, { flags: "w" });
|
|
1414
1415
|
return this.stream;
|
|
1415
1416
|
})();
|
|
1416
1417
|
}
|
|
@@ -1421,25 +1422,27 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1421
1422
|
resultCallback({ code: 0 });
|
|
1422
1423
|
return;
|
|
1423
1424
|
}
|
|
1424
|
-
const
|
|
1425
|
-
const childMap = /* @__PURE__ */ new Map();
|
|
1425
|
+
const rootSpans = [];
|
|
1426
1426
|
for (const span of spans) {
|
|
1427
|
-
|
|
1428
|
-
const
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1427
|
+
const traceId = span.spanContext().traceId;
|
|
1428
|
+
const existing = this.spansByTraceId.get(traceId) ?? [];
|
|
1429
|
+
existing.push(span);
|
|
1430
|
+
this.spansByTraceId.set(traceId, existing);
|
|
1431
|
+
if (span.name === "agentv.eval") {
|
|
1432
|
+
rootSpans.push(span);
|
|
1432
1433
|
}
|
|
1433
1434
|
}
|
|
1434
|
-
const rootSpans = spans.filter(
|
|
1435
|
-
(s) => !s.parentSpanId || !spanMap.has(s.parentSpanId)
|
|
1436
|
-
);
|
|
1437
1435
|
const writePromise = this.ensureStream().then((stream) => {
|
|
1438
1436
|
for (const root of rootSpans) {
|
|
1439
|
-
const
|
|
1437
|
+
const traceId = root.spanContext().traceId;
|
|
1438
|
+
const traceSpans = this.spansByTraceId.get(traceId) ?? [root];
|
|
1439
|
+
const children = traceSpans.filter(
|
|
1440
|
+
(span) => span.spanContext().spanId !== root.spanContext().spanId
|
|
1441
|
+
);
|
|
1440
1442
|
const record = this.buildSimpleRecord(root, children);
|
|
1441
1443
|
stream.write(`${JSON.stringify(record)}
|
|
1442
1444
|
`);
|
|
1445
|
+
this.spansByTraceId.delete(traceId);
|
|
1443
1446
|
}
|
|
1444
1447
|
});
|
|
1445
1448
|
this.pendingWrites.push(writePromise);
|
|
@@ -1449,6 +1452,7 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1449
1452
|
this._shuttingDown = true;
|
|
1450
1453
|
await Promise.all(this.pendingWrites);
|
|
1451
1454
|
this.pendingWrites = [];
|
|
1455
|
+
this.spansByTraceId.clear();
|
|
1452
1456
|
return new Promise((resolve) => {
|
|
1453
1457
|
if (this.stream) {
|
|
1454
1458
|
this.stream.end(() => resolve());
|
|
@@ -1461,17 +1465,9 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1461
1465
|
await Promise.all(this.pendingWrites);
|
|
1462
1466
|
this.pendingWrites = [];
|
|
1463
1467
|
}
|
|
1464
|
-
collectChildren(spanId, childMap) {
|
|
1465
|
-
const direct = childMap.get(spanId) || [];
|
|
1466
|
-
const all = [...direct];
|
|
1467
|
-
for (const child of direct) {
|
|
1468
|
-
all.push(...this.collectChildren(child.spanContext().spanId, childMap));
|
|
1469
|
-
}
|
|
1470
|
-
return all;
|
|
1471
|
-
}
|
|
1472
1468
|
buildSimpleRecord(root, children) {
|
|
1473
1469
|
const attrs = root.attributes || {};
|
|
1474
|
-
const durationMs = hrTimeDiffMs(root.startTime, root.endTime);
|
|
1470
|
+
const durationMs = typeof attrs["agentv.trace.duration_ms"] === "number" ? attrs["agentv.trace.duration_ms"] : hrTimeDiffMs(root.startTime, root.endTime);
|
|
1475
1471
|
let inputTokens = 0;
|
|
1476
1472
|
let outputTokens = 0;
|
|
1477
1473
|
for (const child of children) {
|
|
@@ -1479,6 +1475,14 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1479
1475
|
if (ca["gen_ai.usage.input_tokens"]) inputTokens += ca["gen_ai.usage.input_tokens"];
|
|
1480
1476
|
if (ca["gen_ai.usage.output_tokens"]) outputTokens += ca["gen_ai.usage.output_tokens"];
|
|
1481
1477
|
}
|
|
1478
|
+
const rootInputTokens = typeof attrs["agentv.trace.token_input"] === "number" ? attrs["agentv.trace.token_input"] : 0;
|
|
1479
|
+
const rootOutputTokens = typeof attrs["agentv.trace.token_output"] === "number" ? attrs["agentv.trace.token_output"] : 0;
|
|
1480
|
+
const rootCachedTokens = typeof attrs["agentv.trace.token_cached"] === "number" ? attrs["agentv.trace.token_cached"] : void 0;
|
|
1481
|
+
const llmSpans = children.filter((s) => s.attributes?.["gen_ai.operation.name"] === "chat").map((s) => ({
|
|
1482
|
+
type: "llm",
|
|
1483
|
+
name: s.name,
|
|
1484
|
+
duration_ms: hrTimeDiffMs(s.startTime, s.endTime)
|
|
1485
|
+
}));
|
|
1482
1486
|
const toolSpans = children.filter((s) => s.attributes?.["gen_ai.tool.name"]).map((s) => ({
|
|
1483
1487
|
type: "tool",
|
|
1484
1488
|
name: s.attributes["gen_ai.tool.name"],
|
|
@@ -1490,8 +1494,12 @@ var init_simple_trace_file_exporter = __esm({
|
|
|
1490
1494
|
score: attrs["agentv.score"],
|
|
1491
1495
|
duration_ms: durationMs,
|
|
1492
1496
|
cost_usd: attrs["agentv.trace.cost_usd"],
|
|
1493
|
-
token_usage: inputTokens || outputTokens
|
|
1494
|
-
|
|
1497
|
+
token_usage: inputTokens || outputTokens || rootInputTokens || rootOutputTokens || rootCachedTokens ? {
|
|
1498
|
+
input: inputTokens || rootInputTokens,
|
|
1499
|
+
output: outputTokens || rootOutputTokens,
|
|
1500
|
+
...rootCachedTokens ? { cached: rootCachedTokens } : {}
|
|
1501
|
+
} : void 0,
|
|
1502
|
+
spans: [...llmSpans, ...toolSpans].length > 0 ? [...llmSpans, ...toolSpans] : void 0
|
|
1495
1503
|
};
|
|
1496
1504
|
}
|
|
1497
1505
|
};
|
|
@@ -8749,265 +8757,7 @@ var MockProvider = class {
|
|
|
8749
8757
|
}
|
|
8750
8758
|
};
|
|
8751
8759
|
|
|
8752
|
-
// src/evaluation/providers/pi-
|
|
8753
|
-
function extractPiTextContent(content) {
|
|
8754
|
-
if (typeof content === "string") {
|
|
8755
|
-
return content;
|
|
8756
|
-
}
|
|
8757
|
-
if (!Array.isArray(content)) {
|
|
8758
|
-
return void 0;
|
|
8759
|
-
}
|
|
8760
|
-
const textParts = [];
|
|
8761
|
-
for (const part of content) {
|
|
8762
|
-
if (!part || typeof part !== "object") {
|
|
8763
|
-
continue;
|
|
8764
|
-
}
|
|
8765
|
-
const p = part;
|
|
8766
|
-
if (p.type === "text" && typeof p.text === "string") {
|
|
8767
|
-
textParts.push(p.text);
|
|
8768
|
-
}
|
|
8769
|
-
}
|
|
8770
|
-
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
8771
|
-
}
|
|
8772
|
-
function toFiniteNumber(value) {
|
|
8773
|
-
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
8774
|
-
return void 0;
|
|
8775
|
-
}
|
|
8776
|
-
|
|
8777
|
-
// src/evaluation/providers/pi-agent-sdk.ts
|
|
8778
|
-
var piAgentModule = null;
|
|
8779
|
-
var piAiModule = null;
|
|
8780
|
-
async function loadPiModules() {
|
|
8781
|
-
if (!piAgentModule || !piAiModule) {
|
|
8782
|
-
try {
|
|
8783
|
-
[piAgentModule, piAiModule] = await Promise.all([
|
|
8784
|
-
import("@mariozechner/pi-agent-core"),
|
|
8785
|
-
import("@mariozechner/pi-ai")
|
|
8786
|
-
]);
|
|
8787
|
-
} catch (error) {
|
|
8788
|
-
throw new Error(
|
|
8789
|
-
`Failed to load pi-agent-sdk dependencies. Please install them:
|
|
8790
|
-
npm install @mariozechner/pi-agent-core @mariozechner/pi-ai
|
|
8791
|
-
|
|
8792
|
-
Original error: ${error instanceof Error ? error.message : String(error)}`
|
|
8793
|
-
);
|
|
8794
|
-
}
|
|
8795
|
-
}
|
|
8796
|
-
return {
|
|
8797
|
-
Agent: piAgentModule.Agent,
|
|
8798
|
-
getModel: piAiModule.getModel,
|
|
8799
|
-
getEnvApiKey: piAiModule.getEnvApiKey
|
|
8800
|
-
};
|
|
8801
|
-
}
|
|
8802
|
-
var PiAgentSdkProvider = class {
|
|
8803
|
-
id;
|
|
8804
|
-
kind = "pi-agent-sdk";
|
|
8805
|
-
targetName;
|
|
8806
|
-
supportsBatch = false;
|
|
8807
|
-
config;
|
|
8808
|
-
constructor(targetName, config) {
|
|
8809
|
-
this.id = `pi-agent-sdk:${targetName}`;
|
|
8810
|
-
this.targetName = targetName;
|
|
8811
|
-
this.config = config;
|
|
8812
|
-
}
|
|
8813
|
-
async invoke(request) {
|
|
8814
|
-
if (request.signal?.aborted) {
|
|
8815
|
-
throw new Error("Pi agent SDK request was aborted before execution");
|
|
8816
|
-
}
|
|
8817
|
-
const { Agent, getModel, getEnvApiKey } = await loadPiModules();
|
|
8818
|
-
const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
|
|
8819
|
-
const startMs = Date.now();
|
|
8820
|
-
const providerName = this.config.subprovider ?? "anthropic";
|
|
8821
|
-
const modelId = this.config.model ?? "claude-sonnet-4-20250514";
|
|
8822
|
-
const model = getModel(providerName, modelId);
|
|
8823
|
-
const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
|
|
8824
|
-
const agent = new Agent({
|
|
8825
|
-
initialState: {
|
|
8826
|
-
systemPrompt,
|
|
8827
|
-
model,
|
|
8828
|
-
tools: [],
|
|
8829
|
-
// No tools for simple Q&A
|
|
8830
|
-
messages: []
|
|
8831
|
-
},
|
|
8832
|
-
getApiKey: async (provider) => {
|
|
8833
|
-
return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
|
|
8834
|
-
}
|
|
8835
|
-
});
|
|
8836
|
-
let tokenUsage;
|
|
8837
|
-
let costUsd;
|
|
8838
|
-
const toolTrackers = /* @__PURE__ */ new Map();
|
|
8839
|
-
const completedToolResults = /* @__PURE__ */ new Map();
|
|
8840
|
-
const unsubscribe = agent.subscribe((event) => {
|
|
8841
|
-
switch (event.type) {
|
|
8842
|
-
case "message_end": {
|
|
8843
|
-
const msg = event.message;
|
|
8844
|
-
if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
|
|
8845
|
-
const usage = msg.usage;
|
|
8846
|
-
if (usage && typeof usage === "object") {
|
|
8847
|
-
const u = usage;
|
|
8848
|
-
const input = toFiniteNumber(u.input);
|
|
8849
|
-
const output = toFiniteNumber(u.output);
|
|
8850
|
-
const cached = toFiniteNumber(u.cacheRead);
|
|
8851
|
-
let callDelta;
|
|
8852
|
-
if (input !== void 0 || output !== void 0) {
|
|
8853
|
-
callDelta = {
|
|
8854
|
-
input: input ?? 0,
|
|
8855
|
-
output: output ?? 0,
|
|
8856
|
-
...cached !== void 0 ? { cached } : {}
|
|
8857
|
-
};
|
|
8858
|
-
tokenUsage = {
|
|
8859
|
-
input: (tokenUsage?.input ?? 0) + callDelta.input,
|
|
8860
|
-
output: (tokenUsage?.output ?? 0) + callDelta.output,
|
|
8861
|
-
...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
|
|
8862
|
-
};
|
|
8863
|
-
}
|
|
8864
|
-
const cost = u.cost;
|
|
8865
|
-
if (cost && typeof cost === "object") {
|
|
8866
|
-
const total = toFiniteNumber(cost.total);
|
|
8867
|
-
if (total !== void 0) {
|
|
8868
|
-
costUsd = (costUsd ?? 0) + total;
|
|
8869
|
-
}
|
|
8870
|
-
}
|
|
8871
|
-
request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
|
|
8872
|
-
}
|
|
8873
|
-
}
|
|
8874
|
-
break;
|
|
8875
|
-
}
|
|
8876
|
-
case "tool_execution_start": {
|
|
8877
|
-
toolTrackers.set(event.toolCallId, {
|
|
8878
|
-
toolCallId: event.toolCallId,
|
|
8879
|
-
toolName: event.toolName,
|
|
8880
|
-
args: event.args,
|
|
8881
|
-
startMs: Date.now(),
|
|
8882
|
-
startTime: (/* @__PURE__ */ new Date()).toISOString()
|
|
8883
|
-
});
|
|
8884
|
-
request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
|
|
8885
|
-
break;
|
|
8886
|
-
}
|
|
8887
|
-
case "tool_execution_end": {
|
|
8888
|
-
const tracker = toolTrackers.get(event.toolCallId);
|
|
8889
|
-
const durationMs = tracker ? Date.now() - tracker.startMs : 0;
|
|
8890
|
-
completedToolResults.set(event.toolCallId, {
|
|
8891
|
-
output: event.result,
|
|
8892
|
-
durationMs
|
|
8893
|
-
});
|
|
8894
|
-
request.streamCallbacks?.onToolCallEnd?.(
|
|
8895
|
-
event.toolName,
|
|
8896
|
-
tracker?.args,
|
|
8897
|
-
event.result,
|
|
8898
|
-
durationMs,
|
|
8899
|
-
event.toolCallId
|
|
8900
|
-
);
|
|
8901
|
-
toolTrackers.delete(event.toolCallId);
|
|
8902
|
-
break;
|
|
8903
|
-
}
|
|
8904
|
-
}
|
|
8905
|
-
});
|
|
8906
|
-
try {
|
|
8907
|
-
if (this.config.timeoutMs) {
|
|
8908
|
-
const timeoutMs = this.config.timeoutMs;
|
|
8909
|
-
const timeoutPromise = new Promise((_, reject) => {
|
|
8910
|
-
setTimeout(
|
|
8911
|
-
() => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
|
|
8912
|
-
timeoutMs
|
|
8913
|
-
);
|
|
8914
|
-
});
|
|
8915
|
-
await Promise.race([agent.prompt(request.question), timeoutPromise]);
|
|
8916
|
-
} else {
|
|
8917
|
-
await agent.prompt(request.question);
|
|
8918
|
-
}
|
|
8919
|
-
await agent.waitForIdle();
|
|
8920
|
-
const agentMessages = agent.state.messages;
|
|
8921
|
-
const output = [];
|
|
8922
|
-
for (const msg of agentMessages) {
|
|
8923
|
-
output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
|
|
8924
|
-
}
|
|
8925
|
-
const endTimeIso = (/* @__PURE__ */ new Date()).toISOString();
|
|
8926
|
-
const durationMs = Date.now() - startMs;
|
|
8927
|
-
return {
|
|
8928
|
-
raw: {
|
|
8929
|
-
messages: agentMessages,
|
|
8930
|
-
systemPrompt,
|
|
8931
|
-
model: this.config.model,
|
|
8932
|
-
subprovider: this.config.subprovider
|
|
8933
|
-
},
|
|
8934
|
-
output,
|
|
8935
|
-
tokenUsage,
|
|
8936
|
-
costUsd,
|
|
8937
|
-
durationMs,
|
|
8938
|
-
startTime: startTimeIso,
|
|
8939
|
-
endTime: endTimeIso
|
|
8940
|
-
};
|
|
8941
|
-
} finally {
|
|
8942
|
-
unsubscribe();
|
|
8943
|
-
}
|
|
8944
|
-
}
|
|
8945
|
-
};
|
|
8946
|
-
function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
8947
|
-
if (!message || typeof message !== "object") {
|
|
8948
|
-
return { role: "unknown", content: String(message) };
|
|
8949
|
-
}
|
|
8950
|
-
const msg = message;
|
|
8951
|
-
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
8952
|
-
const content = extractPiTextContent(msg.content);
|
|
8953
|
-
const toolCalls = extractToolCalls3(msg.content, toolTrackers, completedToolResults);
|
|
8954
|
-
const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
8955
|
-
let msgTokenUsage;
|
|
8956
|
-
if (msg.usage && typeof msg.usage === "object") {
|
|
8957
|
-
const u = msg.usage;
|
|
8958
|
-
const input = toFiniteNumber(u.input);
|
|
8959
|
-
const output = toFiniteNumber(u.output);
|
|
8960
|
-
if (input !== void 0 || output !== void 0) {
|
|
8961
|
-
msgTokenUsage = {
|
|
8962
|
-
input: input ?? 0,
|
|
8963
|
-
output: output ?? 0,
|
|
8964
|
-
...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
|
|
8965
|
-
};
|
|
8966
|
-
}
|
|
8967
|
-
}
|
|
8968
|
-
const metadata = {};
|
|
8969
|
-
if (msg.api) metadata.api = msg.api;
|
|
8970
|
-
if (msg.provider) metadata.provider = msg.provider;
|
|
8971
|
-
if (msg.model) metadata.model = msg.model;
|
|
8972
|
-
if (msg.stopReason) metadata.stopReason = msg.stopReason;
|
|
8973
|
-
return {
|
|
8974
|
-
role,
|
|
8975
|
-
content,
|
|
8976
|
-
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
8977
|
-
startTime,
|
|
8978
|
-
metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
|
|
8979
|
-
tokenUsage: msgTokenUsage
|
|
8980
|
-
};
|
|
8981
|
-
}
|
|
8982
|
-
function extractToolCalls3(content, toolTrackers, completedToolResults) {
|
|
8983
|
-
if (!Array.isArray(content)) {
|
|
8984
|
-
return [];
|
|
8985
|
-
}
|
|
8986
|
-
const toolCalls = [];
|
|
8987
|
-
for (const part of content) {
|
|
8988
|
-
if (!part || typeof part !== "object") {
|
|
8989
|
-
continue;
|
|
8990
|
-
}
|
|
8991
|
-
const p = part;
|
|
8992
|
-
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
8993
|
-
const id = typeof p.id === "string" ? p.id : void 0;
|
|
8994
|
-
const tracker = id ? toolTrackers.get(id) : void 0;
|
|
8995
|
-
const completed = id ? completedToolResults.get(id) : void 0;
|
|
8996
|
-
toolCalls.push({
|
|
8997
|
-
tool: p.name,
|
|
8998
|
-
input: p.arguments,
|
|
8999
|
-
id,
|
|
9000
|
-
output: completed?.output,
|
|
9001
|
-
durationMs: completed?.durationMs,
|
|
9002
|
-
startTime: tracker?.startTime,
|
|
9003
|
-
endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
|
|
9004
|
-
});
|
|
9005
|
-
}
|
|
9006
|
-
}
|
|
9007
|
-
return toolCalls;
|
|
9008
|
-
}
|
|
9009
|
-
|
|
9010
|
-
// src/evaluation/providers/pi-coding-agent.ts
|
|
8760
|
+
// src/evaluation/providers/pi-cli.ts
|
|
9011
8761
|
var import_node_child_process4 = require("child_process");
|
|
9012
8762
|
var import_node_crypto7 = require("crypto");
|
|
9013
8763
|
var import_node_fs8 = require("fs");
|
|
@@ -9068,25 +8818,50 @@ function subscribeToPiLogEntries(listener) {
|
|
|
9068
8818
|
};
|
|
9069
8819
|
}
|
|
9070
8820
|
|
|
9071
|
-
// src/evaluation/providers/pi-
|
|
8821
|
+
// src/evaluation/providers/pi-utils.ts
|
|
8822
|
+
function extractPiTextContent(content) {
|
|
8823
|
+
if (typeof content === "string") {
|
|
8824
|
+
return content;
|
|
8825
|
+
}
|
|
8826
|
+
if (!Array.isArray(content)) {
|
|
8827
|
+
return void 0;
|
|
8828
|
+
}
|
|
8829
|
+
const textParts = [];
|
|
8830
|
+
for (const part of content) {
|
|
8831
|
+
if (!part || typeof part !== "object") {
|
|
8832
|
+
continue;
|
|
8833
|
+
}
|
|
8834
|
+
const p = part;
|
|
8835
|
+
if (p.type === "text" && typeof p.text === "string") {
|
|
8836
|
+
textParts.push(p.text);
|
|
8837
|
+
}
|
|
8838
|
+
}
|
|
8839
|
+
return textParts.length > 0 ? textParts.join("\n") : void 0;
|
|
8840
|
+
}
|
|
8841
|
+
function toFiniteNumber(value) {
|
|
8842
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
8843
|
+
return void 0;
|
|
8844
|
+
}
|
|
8845
|
+
|
|
8846
|
+
// src/evaluation/providers/pi-cli.ts
|
|
9072
8847
|
var WORKSPACE_PREFIX = "agentv-pi-";
|
|
9073
8848
|
var PROMPT_FILENAME = "prompt.md";
|
|
9074
|
-
var
|
|
8849
|
+
var PiCliProvider = class {
|
|
9075
8850
|
id;
|
|
9076
|
-
kind = "pi-
|
|
8851
|
+
kind = "pi-cli";
|
|
9077
8852
|
targetName;
|
|
9078
8853
|
supportsBatch = false;
|
|
9079
8854
|
config;
|
|
9080
8855
|
runPi;
|
|
9081
8856
|
constructor(targetName, config, runner = defaultPiRunner) {
|
|
9082
|
-
this.id = `pi-
|
|
8857
|
+
this.id = `pi-cli:${targetName}`;
|
|
9083
8858
|
this.targetName = targetName;
|
|
9084
8859
|
this.config = config;
|
|
9085
8860
|
this.runPi = runner;
|
|
9086
8861
|
}
|
|
9087
8862
|
async invoke(request) {
|
|
9088
8863
|
if (request.signal?.aborted) {
|
|
9089
|
-
throw new Error("Pi
|
|
8864
|
+
throw new Error("Pi CLI request was aborted before execution");
|
|
9090
8865
|
}
|
|
9091
8866
|
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
9092
8867
|
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -9096,17 +8871,17 @@ var PiCodingAgentProvider = class {
|
|
|
9096
8871
|
try {
|
|
9097
8872
|
const promptFile = import_node_path19.default.join(workspaceRoot, PROMPT_FILENAME);
|
|
9098
8873
|
await (0, import_promises16.writeFile)(promptFile, request.question, "utf8");
|
|
9099
|
-
const args = this.buildPiArgs(request.question, inputFiles
|
|
8874
|
+
const args = this.buildPiArgs(request.question, inputFiles);
|
|
9100
8875
|
const cwd = this.resolveCwd(workspaceRoot, request.cwd);
|
|
9101
8876
|
const result = await this.executePi(args, cwd, request.signal, logger);
|
|
9102
8877
|
if (result.timedOut) {
|
|
9103
8878
|
throw new Error(
|
|
9104
|
-
`Pi
|
|
8879
|
+
`Pi CLI timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
|
|
9105
8880
|
);
|
|
9106
8881
|
}
|
|
9107
8882
|
if (result.exitCode !== 0) {
|
|
9108
8883
|
const detail = pickDetail(result.stderr, result.stdout);
|
|
9109
|
-
const prefix = `Pi
|
|
8884
|
+
const prefix = `Pi CLI exited with code ${result.exitCode}`;
|
|
9110
8885
|
throw new Error(detail ? `${prefix}: ${detail}` : prefix);
|
|
9111
8886
|
}
|
|
9112
8887
|
const parsed = parsePiJsonl(result.stdout);
|
|
@@ -9163,7 +8938,7 @@ var PiCodingAgentProvider = class {
|
|
|
9163
8938
|
}
|
|
9164
8939
|
return import_node_path19.default.resolve(this.config.cwd);
|
|
9165
8940
|
}
|
|
9166
|
-
buildPiArgs(prompt, inputFiles
|
|
8941
|
+
buildPiArgs(prompt, inputFiles) {
|
|
9167
8942
|
const args = [];
|
|
9168
8943
|
if (this.config.subprovider) {
|
|
9169
8944
|
args.push("--provider", this.config.subprovider);
|
|
@@ -9215,7 +8990,7 @@ ${prompt}` : prompt;
|
|
|
9215
8990
|
const err = error;
|
|
9216
8991
|
if (err.code === "ENOENT") {
|
|
9217
8992
|
throw new Error(
|
|
9218
|
-
`Pi
|
|
8993
|
+
`Pi CLI executable '${this.config.executable}' was not found. Update the target executable or add it to PATH.`
|
|
9219
8994
|
);
|
|
9220
8995
|
}
|
|
9221
8996
|
throw error;
|
|
@@ -9225,26 +9000,18 @@ ${prompt}` : prompt;
|
|
|
9225
9000
|
const env = { ...process.env };
|
|
9226
9001
|
if (this.config.apiKey) {
|
|
9227
9002
|
const provider = this.config.subprovider?.toLowerCase() ?? "google";
|
|
9228
|
-
|
|
9229
|
-
|
|
9230
|
-
|
|
9231
|
-
|
|
9232
|
-
|
|
9233
|
-
|
|
9234
|
-
|
|
9235
|
-
|
|
9236
|
-
|
|
9237
|
-
|
|
9238
|
-
|
|
9239
|
-
|
|
9240
|
-
env.GROQ_API_KEY = this.config.apiKey;
|
|
9241
|
-
break;
|
|
9242
|
-
case "xai":
|
|
9243
|
-
env.XAI_API_KEY = this.config.apiKey;
|
|
9244
|
-
break;
|
|
9245
|
-
case "openrouter":
|
|
9246
|
-
env.OPENROUTER_API_KEY = this.config.apiKey;
|
|
9247
|
-
break;
|
|
9003
|
+
const ENV_KEY_MAP = {
|
|
9004
|
+
google: "GEMINI_API_KEY",
|
|
9005
|
+
gemini: "GEMINI_API_KEY",
|
|
9006
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
9007
|
+
openai: "OPENAI_API_KEY",
|
|
9008
|
+
groq: "GROQ_API_KEY",
|
|
9009
|
+
xai: "XAI_API_KEY",
|
|
9010
|
+
openrouter: "OPENROUTER_API_KEY"
|
|
9011
|
+
};
|
|
9012
|
+
const envKey = ENV_KEY_MAP[provider];
|
|
9013
|
+
if (envKey) {
|
|
9014
|
+
env[envKey] = this.config.apiKey;
|
|
9248
9015
|
}
|
|
9249
9016
|
}
|
|
9250
9017
|
return env;
|
|
@@ -9262,7 +9029,7 @@ ${prompt}` : prompt;
|
|
|
9262
9029
|
if (this.config.logDir) {
|
|
9263
9030
|
return import_node_path19.default.resolve(this.config.logDir);
|
|
9264
9031
|
}
|
|
9265
|
-
return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "pi-
|
|
9032
|
+
return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "pi-cli");
|
|
9266
9033
|
}
|
|
9267
9034
|
async createStreamLogger(request) {
|
|
9268
9035
|
const logDir = this.resolveLogDirectory();
|
|
@@ -9314,7 +9081,7 @@ var PiStreamLogger = class _PiStreamLogger {
|
|
|
9314
9081
|
static async create(options) {
|
|
9315
9082
|
const logger = new _PiStreamLogger(options.filePath, options.format);
|
|
9316
9083
|
const header = [
|
|
9317
|
-
"# Pi
|
|
9084
|
+
"# Pi CLI stream log",
|
|
9318
9085
|
`# target: ${options.targetName}`,
|
|
9319
9086
|
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
9320
9087
|
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
@@ -9463,10 +9230,10 @@ function summarizePiEvent(event) {
|
|
|
9463
9230
|
return `${type}: ${role}`;
|
|
9464
9231
|
}
|
|
9465
9232
|
case "message_update": {
|
|
9466
|
-
const
|
|
9467
|
-
const eventType =
|
|
9233
|
+
const evt = record.assistantMessageEvent;
|
|
9234
|
+
const eventType = evt?.type;
|
|
9468
9235
|
if (eventType === "text_delta") {
|
|
9469
|
-
const delta =
|
|
9236
|
+
const delta = evt?.delta;
|
|
9470
9237
|
if (typeof delta === "string") {
|
|
9471
9238
|
const preview = delta.length > 50 ? `${delta.slice(0, 50)}...` : delta;
|
|
9472
9239
|
return `text_delta: ${preview}`;
|
|
@@ -9488,7 +9255,7 @@ function tryParseJsonValue(rawLine) {
|
|
|
9488
9255
|
function parsePiJsonl(output) {
|
|
9489
9256
|
const trimmed = output.trim();
|
|
9490
9257
|
if (trimmed.length === 0) {
|
|
9491
|
-
throw new Error("Pi
|
|
9258
|
+
throw new Error("Pi CLI produced no output");
|
|
9492
9259
|
}
|
|
9493
9260
|
const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
9494
9261
|
const parsed = [];
|
|
@@ -9499,38 +9266,27 @@ function parsePiJsonl(output) {
|
|
|
9499
9266
|
}
|
|
9500
9267
|
}
|
|
9501
9268
|
if (parsed.length === 0) {
|
|
9502
|
-
throw new Error("Pi
|
|
9269
|
+
throw new Error("Pi CLI produced no valid JSON output");
|
|
9503
9270
|
}
|
|
9504
9271
|
return parsed;
|
|
9505
9272
|
}
|
|
9506
9273
|
function extractMessages(events) {
|
|
9507
9274
|
for (let i = events.length - 1; i >= 0; i--) {
|
|
9508
9275
|
const event = events[i];
|
|
9509
|
-
if (!event || typeof event !== "object")
|
|
9510
|
-
continue;
|
|
9511
|
-
}
|
|
9276
|
+
if (!event || typeof event !== "object") continue;
|
|
9512
9277
|
const record = event;
|
|
9513
|
-
if (record.type !== "agent_end")
|
|
9514
|
-
continue;
|
|
9515
|
-
}
|
|
9278
|
+
if (record.type !== "agent_end") continue;
|
|
9516
9279
|
const messages = record.messages;
|
|
9517
|
-
if (!Array.isArray(messages))
|
|
9518
|
-
continue;
|
|
9519
|
-
}
|
|
9280
|
+
if (!Array.isArray(messages)) continue;
|
|
9520
9281
|
return messages.map(convertPiMessage).filter((m) => m !== void 0);
|
|
9521
9282
|
}
|
|
9522
9283
|
const output = [];
|
|
9523
9284
|
for (const event of events) {
|
|
9524
|
-
if (!event || typeof event !== "object")
|
|
9525
|
-
continue;
|
|
9526
|
-
}
|
|
9285
|
+
if (!event || typeof event !== "object") continue;
|
|
9527
9286
|
const record = event;
|
|
9528
9287
|
if (record.type === "turn_end") {
|
|
9529
|
-
const
|
|
9530
|
-
|
|
9531
|
-
if (converted) {
|
|
9532
|
-
output.push(converted);
|
|
9533
|
-
}
|
|
9288
|
+
const converted = convertPiMessage(record.message);
|
|
9289
|
+
if (converted) output.push(converted);
|
|
9534
9290
|
}
|
|
9535
9291
|
}
|
|
9536
9292
|
return output;
|
|
@@ -9547,10 +9303,7 @@ function extractTokenUsage(events) {
|
|
|
9547
9303
|
const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
|
|
9548
9304
|
const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
|
|
9549
9305
|
if (input !== void 0 || output !== void 0) {
|
|
9550
|
-
const result = {
|
|
9551
|
-
input: input ?? 0,
|
|
9552
|
-
output: output ?? 0
|
|
9553
|
-
};
|
|
9306
|
+
const result = { input: input ?? 0, output: output ?? 0 };
|
|
9554
9307
|
const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
|
|
9555
9308
|
const reasoning = toFiniteNumber(u.reasoning_tokens ?? u.reasoningTokens ?? u.reasoning);
|
|
9556
9309
|
return {
|
|
@@ -9598,16 +9351,12 @@ function aggregateUsageFromMessages(messages) {
|
|
|
9598
9351
|
return result;
|
|
9599
9352
|
}
|
|
9600
9353
|
function convertPiMessage(message) {
|
|
9601
|
-
if (!message || typeof message !== "object")
|
|
9602
|
-
return void 0;
|
|
9603
|
-
}
|
|
9354
|
+
if (!message || typeof message !== "object") return void 0;
|
|
9604
9355
|
const msg = message;
|
|
9605
9356
|
const role = msg.role;
|
|
9606
|
-
if (typeof role !== "string")
|
|
9607
|
-
return void 0;
|
|
9608
|
-
}
|
|
9357
|
+
if (typeof role !== "string") return void 0;
|
|
9609
9358
|
const content = extractPiTextContent(msg.content);
|
|
9610
|
-
const toolCalls =
|
|
9359
|
+
const toolCalls = extractToolCalls3(msg.content);
|
|
9611
9360
|
const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
9612
9361
|
const metadata = {};
|
|
9613
9362
|
if (msg.api) metadata.api = msg.api;
|
|
@@ -9623,15 +9372,11 @@ function convertPiMessage(message) {
|
|
|
9623
9372
|
metadata: Object.keys(metadata).length > 0 ? metadata : void 0
|
|
9624
9373
|
};
|
|
9625
9374
|
}
|
|
9626
|
-
function
|
|
9627
|
-
if (!Array.isArray(content))
|
|
9628
|
-
return [];
|
|
9629
|
-
}
|
|
9375
|
+
function extractToolCalls3(content) {
|
|
9376
|
+
if (!Array.isArray(content)) return [];
|
|
9630
9377
|
const toolCalls = [];
|
|
9631
9378
|
for (const part of content) {
|
|
9632
|
-
if (!part || typeof part !== "object")
|
|
9633
|
-
continue;
|
|
9634
|
-
}
|
|
9379
|
+
if (!part || typeof part !== "object") continue;
|
|
9635
9380
|
const p = part;
|
|
9636
9381
|
if (p.type === "tool_use" && typeof p.name === "string") {
|
|
9637
9382
|
toolCalls.push({
|
|
@@ -9651,10 +9396,7 @@ function extractToolCalls4(content) {
|
|
|
9651
9396
|
const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
|
|
9652
9397
|
if (existing) {
|
|
9653
9398
|
const idx = toolCalls.indexOf(existing);
|
|
9654
|
-
toolCalls[idx] = {
|
|
9655
|
-
...existing,
|
|
9656
|
-
output: p.content
|
|
9657
|
-
};
|
|
9399
|
+
toolCalls[idx] = { ...existing, output: p.content };
|
|
9658
9400
|
}
|
|
9659
9401
|
}
|
|
9660
9402
|
}
|
|
@@ -9665,18 +9407,13 @@ function escapeAtSymbols(prompt) {
|
|
|
9665
9407
|
}
|
|
9666
9408
|
function pickDetail(stderr, stdout) {
|
|
9667
9409
|
const errorText = stderr.trim();
|
|
9668
|
-
if (errorText.length > 0)
|
|
9669
|
-
return errorText;
|
|
9670
|
-
}
|
|
9410
|
+
if (errorText.length > 0) return errorText;
|
|
9671
9411
|
const stdoutText = stdout.trim();
|
|
9672
9412
|
return stdoutText.length > 0 ? stdoutText : void 0;
|
|
9673
9413
|
}
|
|
9674
9414
|
function formatTimeoutSuffix3(timeoutMs) {
|
|
9675
|
-
if (!timeoutMs || timeoutMs <= 0)
|
|
9676
|
-
|
|
9677
|
-
}
|
|
9678
|
-
const seconds = Math.ceil(timeoutMs / 1e3);
|
|
9679
|
-
return ` after ${seconds}s`;
|
|
9415
|
+
if (!timeoutMs || timeoutMs <= 0) return "";
|
|
9416
|
+
return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
|
|
9680
9417
|
}
|
|
9681
9418
|
async function defaultPiRunner(options) {
|
|
9682
9419
|
return await new Promise((resolve, reject) => {
|
|
@@ -9711,39 +9448,497 @@ async function defaultPiRunner(options) {
|
|
|
9711
9448
|
}, options.timeoutMs);
|
|
9712
9449
|
timeoutHandle.unref?.();
|
|
9713
9450
|
}
|
|
9714
|
-
child.stdout.setEncoding("utf8");
|
|
9715
|
-
child.stdout.on("data", (chunk) => {
|
|
9716
|
-
stdout += chunk;
|
|
9717
|
-
options.onStdoutChunk?.(chunk);
|
|
9718
|
-
});
|
|
9719
|
-
child.stderr.setEncoding("utf8");
|
|
9720
|
-
child.stderr.on("data", (chunk) => {
|
|
9721
|
-
stderr += chunk;
|
|
9722
|
-
options.onStderrChunk?.(chunk);
|
|
9723
|
-
});
|
|
9724
|
-
child.stdin.end();
|
|
9725
|
-
const cleanup = () => {
|
|
9726
|
-
if (timeoutHandle)
|
|
9727
|
-
|
|
9728
|
-
|
|
9729
|
-
|
|
9730
|
-
|
|
9731
|
-
|
|
9732
|
-
};
|
|
9733
|
-
child.on("
|
|
9734
|
-
cleanup();
|
|
9735
|
-
|
|
9736
|
-
|
|
9737
|
-
|
|
9738
|
-
|
|
9739
|
-
|
|
9740
|
-
|
|
9741
|
-
|
|
9742
|
-
|
|
9743
|
-
|
|
9451
|
+
child.stdout.setEncoding("utf8");
|
|
9452
|
+
child.stdout.on("data", (chunk) => {
|
|
9453
|
+
stdout += chunk;
|
|
9454
|
+
options.onStdoutChunk?.(chunk);
|
|
9455
|
+
});
|
|
9456
|
+
child.stderr.setEncoding("utf8");
|
|
9457
|
+
child.stderr.on("data", (chunk) => {
|
|
9458
|
+
stderr += chunk;
|
|
9459
|
+
options.onStderrChunk?.(chunk);
|
|
9460
|
+
});
|
|
9461
|
+
child.stdin.end();
|
|
9462
|
+
const cleanup = () => {
|
|
9463
|
+
if (timeoutHandle) clearTimeout(timeoutHandle);
|
|
9464
|
+
if (options.signal) options.signal.removeEventListener("abort", onAbort);
|
|
9465
|
+
};
|
|
9466
|
+
child.on("error", (error) => {
|
|
9467
|
+
cleanup();
|
|
9468
|
+
reject(error);
|
|
9469
|
+
});
|
|
9470
|
+
child.on("close", (code) => {
|
|
9471
|
+
cleanup();
|
|
9472
|
+
resolve({
|
|
9473
|
+
stdout,
|
|
9474
|
+
stderr,
|
|
9475
|
+
exitCode: typeof code === "number" ? code : -1,
|
|
9476
|
+
timedOut
|
|
9477
|
+
});
|
|
9478
|
+
});
|
|
9479
|
+
});
|
|
9480
|
+
}
|
|
9481
|
+
|
|
9482
|
+
// src/evaluation/providers/pi-coding-agent.ts
|
|
9483
|
+
var import_node_child_process5 = require("child_process");
|
|
9484
|
+
var import_node_crypto8 = require("crypto");
|
|
9485
|
+
var import_node_fs9 = require("fs");
|
|
9486
|
+
var import_promises17 = require("fs/promises");
|
|
9487
|
+
var import_node_path20 = __toESM(require("path"), 1);
|
|
9488
|
+
var import_node_readline = require("readline");
|
|
9489
|
+
var piCodingAgentModule = null;
|
|
9490
|
+
var piAiModule = null;
|
|
9491
|
+
async function promptInstall() {
|
|
9492
|
+
if (!process.stdout.isTTY) return false;
|
|
9493
|
+
const rl = (0, import_node_readline.createInterface)({ input: process.stdin, output: process.stderr });
|
|
9494
|
+
try {
|
|
9495
|
+
return await new Promise((resolve) => {
|
|
9496
|
+
rl.question(
|
|
9497
|
+
"@mariozechner/pi-coding-agent is not installed. Install it now? (y/N) ",
|
|
9498
|
+
(answer) => resolve(answer.trim().toLowerCase() === "y")
|
|
9499
|
+
);
|
|
9500
|
+
});
|
|
9501
|
+
} finally {
|
|
9502
|
+
rl.close();
|
|
9503
|
+
}
|
|
9504
|
+
}
|
|
9505
|
+
async function loadSdkModules() {
|
|
9506
|
+
if (!piCodingAgentModule || !piAiModule) {
|
|
9507
|
+
try {
|
|
9508
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
9509
|
+
import("@mariozechner/pi-coding-agent"),
|
|
9510
|
+
import("@mariozechner/pi-ai")
|
|
9511
|
+
]);
|
|
9512
|
+
} catch {
|
|
9513
|
+
if (await promptInstall()) {
|
|
9514
|
+
console.error("Installing @mariozechner/pi-coding-agent...");
|
|
9515
|
+
(0, import_node_child_process5.execSync)("bun add @mariozechner/pi-coding-agent", { stdio: "inherit" });
|
|
9516
|
+
[piCodingAgentModule, piAiModule] = await Promise.all([
|
|
9517
|
+
import("@mariozechner/pi-coding-agent"),
|
|
9518
|
+
import("@mariozechner/pi-ai")
|
|
9519
|
+
]);
|
|
9520
|
+
} else {
|
|
9521
|
+
throw new Error(
|
|
9522
|
+
"pi-coding-agent SDK is not installed. Install it with:\n bun add @mariozechner/pi-coding-agent"
|
|
9523
|
+
);
|
|
9524
|
+
}
|
|
9525
|
+
}
|
|
9526
|
+
}
|
|
9527
|
+
const toolMap = {
|
|
9528
|
+
read: piCodingAgentModule.readTool,
|
|
9529
|
+
bash: piCodingAgentModule.bashTool,
|
|
9530
|
+
edit: piCodingAgentModule.editTool,
|
|
9531
|
+
write: piCodingAgentModule.writeTool,
|
|
9532
|
+
grep: piCodingAgentModule.grepTool,
|
|
9533
|
+
find: piCodingAgentModule.findTool,
|
|
9534
|
+
ls: piCodingAgentModule.lsTool
|
|
9535
|
+
};
|
|
9536
|
+
return {
|
|
9537
|
+
createAgentSession: piCodingAgentModule.createAgentSession,
|
|
9538
|
+
codingTools: piCodingAgentModule.codingTools,
|
|
9539
|
+
toolMap,
|
|
9540
|
+
SessionManager: piCodingAgentModule.SessionManager,
|
|
9541
|
+
getModel: piAiModule.getModel
|
|
9542
|
+
};
|
|
9543
|
+
}
|
|
9544
|
+
var PiCodingAgentProvider = class {
|
|
9545
|
+
id;
|
|
9546
|
+
kind = "pi-coding-agent";
|
|
9547
|
+
targetName;
|
|
9548
|
+
supportsBatch = false;
|
|
9549
|
+
config;
|
|
9550
|
+
constructor(targetName, config) {
|
|
9551
|
+
this.id = `pi-coding-agent:${targetName}`;
|
|
9552
|
+
this.targetName = targetName;
|
|
9553
|
+
this.config = config;
|
|
9554
|
+
}
|
|
9555
|
+
async invoke(request) {
|
|
9556
|
+
if (request.signal?.aborted) {
|
|
9557
|
+
throw new Error("Pi coding agent request was aborted before execution");
|
|
9558
|
+
}
|
|
9559
|
+
const inputFiles = normalizeInputFiles(request.inputFiles);
|
|
9560
|
+
const startTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
9561
|
+
const startMs = Date.now();
|
|
9562
|
+
const sdk = await loadSdkModules();
|
|
9563
|
+
const logger = await this.createStreamLogger(request).catch(() => void 0);
|
|
9564
|
+
try {
|
|
9565
|
+
const cwd = this.resolveCwd(request.cwd);
|
|
9566
|
+
const providerName = this.config.subprovider ?? "google";
|
|
9567
|
+
const modelId = this.config.model ?? "gemini-2.5-flash";
|
|
9568
|
+
this.setApiKeyEnv(providerName);
|
|
9569
|
+
const model = sdk.getModel(providerName, modelId);
|
|
9570
|
+
const tools = this.resolveTools(sdk);
|
|
9571
|
+
const { session } = await sdk.createAgentSession({
|
|
9572
|
+
cwd,
|
|
9573
|
+
model,
|
|
9574
|
+
tools,
|
|
9575
|
+
thinkingLevel: this.config.thinking,
|
|
9576
|
+
sessionManager: sdk.SessionManager.inMemory(cwd)
|
|
9577
|
+
});
|
|
9578
|
+
let tokenUsage;
|
|
9579
|
+
let costUsd;
|
|
9580
|
+
const toolTrackers = /* @__PURE__ */ new Map();
|
|
9581
|
+
const completedToolResults = /* @__PURE__ */ new Map();
|
|
9582
|
+
const unsubscribe = session.subscribe((event) => {
|
|
9583
|
+
logger?.handleEvent(event);
|
|
9584
|
+
switch (event.type) {
|
|
9585
|
+
case "message_end": {
|
|
9586
|
+
const msg = event.message;
|
|
9587
|
+
if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
|
|
9588
|
+
const usage = msg.usage;
|
|
9589
|
+
if (usage && typeof usage === "object") {
|
|
9590
|
+
const u = usage;
|
|
9591
|
+
const input = toFiniteNumber(u.input);
|
|
9592
|
+
const output = toFiniteNumber(u.output);
|
|
9593
|
+
const cached = toFiniteNumber(u.cacheRead);
|
|
9594
|
+
let callDelta;
|
|
9595
|
+
if (input !== void 0 || output !== void 0) {
|
|
9596
|
+
callDelta = {
|
|
9597
|
+
input: input ?? 0,
|
|
9598
|
+
output: output ?? 0,
|
|
9599
|
+
...cached !== void 0 ? { cached } : {}
|
|
9600
|
+
};
|
|
9601
|
+
tokenUsage = {
|
|
9602
|
+
input: (tokenUsage?.input ?? 0) + callDelta.input,
|
|
9603
|
+
output: (tokenUsage?.output ?? 0) + callDelta.output,
|
|
9604
|
+
...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
|
|
9605
|
+
};
|
|
9606
|
+
}
|
|
9607
|
+
const cost = u.cost;
|
|
9608
|
+
if (cost && typeof cost === "object") {
|
|
9609
|
+
const total = toFiniteNumber(cost.total);
|
|
9610
|
+
if (total !== void 0) {
|
|
9611
|
+
costUsd = (costUsd ?? 0) + total;
|
|
9612
|
+
}
|
|
9613
|
+
}
|
|
9614
|
+
request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
|
|
9615
|
+
}
|
|
9616
|
+
}
|
|
9617
|
+
break;
|
|
9618
|
+
}
|
|
9619
|
+
case "tool_execution_start": {
|
|
9620
|
+
toolTrackers.set(event.toolCallId, {
|
|
9621
|
+
toolCallId: event.toolCallId,
|
|
9622
|
+
toolName: event.toolName,
|
|
9623
|
+
args: event.args,
|
|
9624
|
+
startMs: Date.now(),
|
|
9625
|
+
startTime: (/* @__PURE__ */ new Date()).toISOString()
|
|
9626
|
+
});
|
|
9627
|
+
request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
|
|
9628
|
+
break;
|
|
9629
|
+
}
|
|
9630
|
+
case "tool_execution_end": {
|
|
9631
|
+
const tracker = toolTrackers.get(event.toolCallId);
|
|
9632
|
+
const durationMs = tracker ? Date.now() - tracker.startMs : 0;
|
|
9633
|
+
completedToolResults.set(event.toolCallId, {
|
|
9634
|
+
output: event.result,
|
|
9635
|
+
durationMs
|
|
9636
|
+
});
|
|
9637
|
+
request.streamCallbacks?.onToolCallEnd?.(
|
|
9638
|
+
event.toolName,
|
|
9639
|
+
tracker?.args,
|
|
9640
|
+
event.result,
|
|
9641
|
+
durationMs,
|
|
9642
|
+
event.toolCallId
|
|
9643
|
+
);
|
|
9644
|
+
toolTrackers.delete(event.toolCallId);
|
|
9645
|
+
break;
|
|
9646
|
+
}
|
|
9647
|
+
}
|
|
9648
|
+
});
|
|
9649
|
+
try {
|
|
9650
|
+
const systemPrompt = this.config.systemPrompt;
|
|
9651
|
+
let prompt = request.question;
|
|
9652
|
+
if (systemPrompt) {
|
|
9653
|
+
prompt = `${systemPrompt}
|
|
9654
|
+
|
|
9655
|
+
${prompt}`;
|
|
9656
|
+
}
|
|
9657
|
+
if (inputFiles && inputFiles.length > 0) {
|
|
9658
|
+
const fileList = inputFiles.map((f) => `@${f}`).join("\n");
|
|
9659
|
+
prompt = `${prompt}
|
|
9660
|
+
|
|
9661
|
+
Files:
|
|
9662
|
+
${fileList}`;
|
|
9663
|
+
}
|
|
9664
|
+
if (this.config.timeoutMs) {
|
|
9665
|
+
const timeoutMs = this.config.timeoutMs;
|
|
9666
|
+
let timeoutId;
|
|
9667
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
9668
|
+
timeoutId = setTimeout(
|
|
9669
|
+
() => reject(
|
|
9670
|
+
new Error(`Pi coding agent timed out after ${Math.ceil(timeoutMs / 1e3)}s`)
|
|
9671
|
+
),
|
|
9672
|
+
timeoutMs
|
|
9673
|
+
);
|
|
9674
|
+
});
|
|
9675
|
+
try {
|
|
9676
|
+
await Promise.race([session.prompt(prompt), timeoutPromise]);
|
|
9677
|
+
} finally {
|
|
9678
|
+
if (timeoutId !== void 0) clearTimeout(timeoutId);
|
|
9679
|
+
}
|
|
9680
|
+
} else {
|
|
9681
|
+
await session.prompt(prompt);
|
|
9682
|
+
}
|
|
9683
|
+
const agentMessages = session.agent.state.messages;
|
|
9684
|
+
const output = [];
|
|
9685
|
+
for (const msg of agentMessages) {
|
|
9686
|
+
output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
|
|
9687
|
+
}
|
|
9688
|
+
const endTime = (/* @__PURE__ */ new Date()).toISOString();
|
|
9689
|
+
const durationMs = Date.now() - startMs;
|
|
9690
|
+
return {
|
|
9691
|
+
raw: {
|
|
9692
|
+
messages: agentMessages,
|
|
9693
|
+
model: this.config.model,
|
|
9694
|
+
provider: this.config.subprovider
|
|
9695
|
+
},
|
|
9696
|
+
output,
|
|
9697
|
+
tokenUsage,
|
|
9698
|
+
costUsd,
|
|
9699
|
+
durationMs,
|
|
9700
|
+
startTime,
|
|
9701
|
+
endTime
|
|
9702
|
+
};
|
|
9703
|
+
} finally {
|
|
9704
|
+
unsubscribe();
|
|
9705
|
+
session.dispose();
|
|
9706
|
+
}
|
|
9707
|
+
} finally {
|
|
9708
|
+
await logger?.close();
|
|
9709
|
+
}
|
|
9710
|
+
}
|
|
9711
|
+
/** Maps config apiKey to the provider-specific env var the SDK reads. */
|
|
9712
|
+
setApiKeyEnv(providerName) {
|
|
9713
|
+
if (!this.config.apiKey) return;
|
|
9714
|
+
const ENV_KEY_MAP = {
|
|
9715
|
+
google: "GEMINI_API_KEY",
|
|
9716
|
+
gemini: "GEMINI_API_KEY",
|
|
9717
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
9718
|
+
openai: "OPENAI_API_KEY",
|
|
9719
|
+
groq: "GROQ_API_KEY",
|
|
9720
|
+
xai: "XAI_API_KEY",
|
|
9721
|
+
openrouter: "OPENROUTER_API_KEY"
|
|
9722
|
+
};
|
|
9723
|
+
const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
|
|
9724
|
+
if (envKey) {
|
|
9725
|
+
process.env[envKey] = this.config.apiKey;
|
|
9726
|
+
}
|
|
9727
|
+
}
|
|
9728
|
+
resolveCwd(cwdOverride) {
|
|
9729
|
+
if (cwdOverride) {
|
|
9730
|
+
return import_node_path20.default.resolve(cwdOverride);
|
|
9731
|
+
}
|
|
9732
|
+
if (this.config.cwd) {
|
|
9733
|
+
return import_node_path20.default.resolve(this.config.cwd);
|
|
9734
|
+
}
|
|
9735
|
+
return process.cwd();
|
|
9736
|
+
}
|
|
9737
|
+
resolveTools(sdk) {
|
|
9738
|
+
if (!this.config.tools) {
|
|
9739
|
+
return sdk.codingTools;
|
|
9740
|
+
}
|
|
9741
|
+
const toolNames = this.config.tools.split(",").map((t) => t.trim().toLowerCase());
|
|
9742
|
+
const selected = [];
|
|
9743
|
+
for (const name of toolNames) {
|
|
9744
|
+
if (name in sdk.toolMap) {
|
|
9745
|
+
selected.push(sdk.toolMap[name]);
|
|
9746
|
+
}
|
|
9747
|
+
}
|
|
9748
|
+
return selected.length > 0 ? selected : sdk.codingTools;
|
|
9749
|
+
}
|
|
9750
|
+
resolveLogDirectory() {
|
|
9751
|
+
if (this.config.logDir) {
|
|
9752
|
+
return import_node_path20.default.resolve(this.config.logDir);
|
|
9753
|
+
}
|
|
9754
|
+
return import_node_path20.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
|
|
9755
|
+
}
|
|
9756
|
+
async createStreamLogger(request) {
|
|
9757
|
+
const logDir = this.resolveLogDirectory();
|
|
9758
|
+
if (!logDir) {
|
|
9759
|
+
return void 0;
|
|
9760
|
+
}
|
|
9761
|
+
try {
|
|
9762
|
+
await (0, import_promises17.mkdir)(logDir, { recursive: true });
|
|
9763
|
+
} catch (error) {
|
|
9764
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9765
|
+
console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
|
|
9766
|
+
return void 0;
|
|
9767
|
+
}
|
|
9768
|
+
const filePath = import_node_path20.default.join(logDir, buildLogFilename6(request, this.targetName));
|
|
9769
|
+
try {
|
|
9770
|
+
const logger = await PiStreamLogger2.create({
|
|
9771
|
+
filePath,
|
|
9772
|
+
targetName: this.targetName,
|
|
9773
|
+
evalCaseId: request.evalCaseId,
|
|
9774
|
+
attempt: request.attempt,
|
|
9775
|
+
format: this.config.logFormat ?? "summary"
|
|
9776
|
+
});
|
|
9777
|
+
recordPiLogEntry({
|
|
9778
|
+
filePath,
|
|
9779
|
+
targetName: this.targetName,
|
|
9780
|
+
evalCaseId: request.evalCaseId,
|
|
9781
|
+
attempt: request.attempt
|
|
9782
|
+
});
|
|
9783
|
+
return logger;
|
|
9784
|
+
} catch (error) {
|
|
9785
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9786
|
+
console.warn(`Skipping Pi stream logging for ${filePath}: ${message}`);
|
|
9787
|
+
return void 0;
|
|
9788
|
+
}
|
|
9789
|
+
}
|
|
9790
|
+
};
|
|
9791
|
+
var PiStreamLogger2 = class _PiStreamLogger {
|
|
9792
|
+
filePath;
|
|
9793
|
+
stream;
|
|
9794
|
+
startedAt = Date.now();
|
|
9795
|
+
format;
|
|
9796
|
+
constructor(filePath, format) {
|
|
9797
|
+
this.filePath = filePath;
|
|
9798
|
+
this.format = format;
|
|
9799
|
+
this.stream = (0, import_node_fs9.createWriteStream)(filePath, { flags: "a" });
|
|
9800
|
+
}
|
|
9801
|
+
static async create(options) {
|
|
9802
|
+
const logger = new _PiStreamLogger(options.filePath, options.format);
|
|
9803
|
+
const header = [
|
|
9804
|
+
"# Pi Coding Agent stream log",
|
|
9805
|
+
`# target: ${options.targetName}`,
|
|
9806
|
+
options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
|
|
9807
|
+
options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
|
|
9808
|
+
`# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
|
|
9809
|
+
""
|
|
9810
|
+
].filter((line) => Boolean(line));
|
|
9811
|
+
for (const line of header) {
|
|
9812
|
+
logger.stream.write(`${line}
|
|
9813
|
+
`);
|
|
9814
|
+
}
|
|
9815
|
+
return logger;
|
|
9816
|
+
}
|
|
9817
|
+
handleEvent(event) {
|
|
9818
|
+
if (!event || typeof event !== "object") return;
|
|
9819
|
+
const record = event;
|
|
9820
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
9821
|
+
if (!type) return;
|
|
9822
|
+
const message = this.format === "json" ? JSON.stringify(event, null, 2) : summarizeSdkEvent2(event);
|
|
9823
|
+
if (message) {
|
|
9824
|
+
this.stream.write(`[+${formatElapsed6(this.startedAt)}] ${message}
|
|
9825
|
+
`);
|
|
9826
|
+
}
|
|
9827
|
+
}
|
|
9828
|
+
async close() {
|
|
9829
|
+
await new Promise((resolve, reject) => {
|
|
9830
|
+
this.stream.once("error", reject);
|
|
9831
|
+
this.stream.end(() => resolve());
|
|
9832
|
+
});
|
|
9833
|
+
}
|
|
9834
|
+
};
|
|
9835
|
+
function summarizeSdkEvent2(event) {
|
|
9836
|
+
if (!event || typeof event !== "object") return void 0;
|
|
9837
|
+
const record = event;
|
|
9838
|
+
const type = typeof record.type === "string" ? record.type : void 0;
|
|
9839
|
+
if (!type) return void 0;
|
|
9840
|
+
switch (type) {
|
|
9841
|
+
case "agent_start":
|
|
9842
|
+
case "agent_end":
|
|
9843
|
+
case "turn_start":
|
|
9844
|
+
case "turn_end":
|
|
9845
|
+
return type;
|
|
9846
|
+
case "message_start":
|
|
9847
|
+
case "message_end": {
|
|
9848
|
+
const msg = record.message;
|
|
9849
|
+
return `${type}: ${msg?.role ?? "unknown"}`;
|
|
9850
|
+
}
|
|
9851
|
+
case "tool_execution_start":
|
|
9852
|
+
return `tool_start: ${record.toolName}`;
|
|
9853
|
+
case "tool_execution_end":
|
|
9854
|
+
return `tool_end: ${record.toolName}`;
|
|
9855
|
+
default:
|
|
9856
|
+
return type;
|
|
9857
|
+
}
|
|
9858
|
+
}
|
|
9859
|
+
function buildLogFilename6(request, targetName) {
|
|
9860
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
9861
|
+
const evalId = sanitizeForFilename6(request.evalCaseId ?? "pi");
|
|
9862
|
+
const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
|
|
9863
|
+
const target = sanitizeForFilename6(targetName);
|
|
9864
|
+
return `${timestamp}_${target}_${evalId}${attemptSuffix}_${(0, import_node_crypto8.randomUUID)().slice(0, 8)}.log`;
|
|
9865
|
+
}
|
|
9866
|
+
function sanitizeForFilename6(value) {
|
|
9867
|
+
const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
|
|
9868
|
+
return sanitized.length > 0 ? sanitized : "pi";
|
|
9869
|
+
}
|
|
9870
|
+
function formatElapsed6(startedAt) {
|
|
9871
|
+
const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
|
|
9872
|
+
const hours = Math.floor(elapsedSeconds / 3600);
|
|
9873
|
+
const minutes = Math.floor(elapsedSeconds % 3600 / 60);
|
|
9874
|
+
const seconds = elapsedSeconds % 60;
|
|
9875
|
+
if (hours > 0) {
|
|
9876
|
+
return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
9877
|
+
}
|
|
9878
|
+
return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
|
|
9879
|
+
}
|
|
9880
|
+
function convertAgentMessage(message, toolTrackers, completedToolResults) {
|
|
9881
|
+
if (!message || typeof message !== "object") {
|
|
9882
|
+
return { role: "unknown", content: String(message) };
|
|
9883
|
+
}
|
|
9884
|
+
const msg = message;
|
|
9885
|
+
const role = typeof msg.role === "string" ? msg.role : "unknown";
|
|
9886
|
+
const content = extractPiTextContent(msg.content);
|
|
9887
|
+
const toolCalls = extractToolCalls4(msg.content, toolTrackers, completedToolResults);
|
|
9888
|
+
const startTimeVal = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
|
|
9889
|
+
let msgTokenUsage;
|
|
9890
|
+
if (msg.usage && typeof msg.usage === "object") {
|
|
9891
|
+
const u = msg.usage;
|
|
9892
|
+
const input = toFiniteNumber(u.input);
|
|
9893
|
+
const output = toFiniteNumber(u.output);
|
|
9894
|
+
if (input !== void 0 || output !== void 0) {
|
|
9895
|
+
msgTokenUsage = {
|
|
9896
|
+
input: input ?? 0,
|
|
9897
|
+
output: output ?? 0,
|
|
9898
|
+
...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
|
|
9899
|
+
};
|
|
9900
|
+
}
|
|
9901
|
+
}
|
|
9902
|
+
const metadata = {};
|
|
9903
|
+
if (msg.api) metadata.api = msg.api;
|
|
9904
|
+
if (msg.provider) metadata.provider = msg.provider;
|
|
9905
|
+
if (msg.model) metadata.model = msg.model;
|
|
9906
|
+
if (msg.stopReason) metadata.stopReason = msg.stopReason;
|
|
9907
|
+
return {
|
|
9908
|
+
role,
|
|
9909
|
+
content,
|
|
9910
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
9911
|
+
startTime: startTimeVal,
|
|
9912
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
|
|
9913
|
+
tokenUsage: msgTokenUsage
|
|
9914
|
+
};
|
|
9915
|
+
}
|
|
9916
|
+
function extractToolCalls4(content, toolTrackers, completedToolResults) {
|
|
9917
|
+
if (!Array.isArray(content)) {
|
|
9918
|
+
return [];
|
|
9919
|
+
}
|
|
9920
|
+
const toolCalls = [];
|
|
9921
|
+
for (const part of content) {
|
|
9922
|
+
if (!part || typeof part !== "object") {
|
|
9923
|
+
continue;
|
|
9924
|
+
}
|
|
9925
|
+
const p = part;
|
|
9926
|
+
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
9927
|
+
const id = typeof p.id === "string" ? p.id : void 0;
|
|
9928
|
+
const tracker = id ? toolTrackers.get(id) : void 0;
|
|
9929
|
+
const completed = id ? completedToolResults.get(id) : void 0;
|
|
9930
|
+
toolCalls.push({
|
|
9931
|
+
tool: p.name,
|
|
9932
|
+
input: p.arguments,
|
|
9933
|
+
id,
|
|
9934
|
+
output: completed?.output,
|
|
9935
|
+
durationMs: completed?.durationMs,
|
|
9936
|
+
startTime: tracker?.startTime,
|
|
9937
|
+
endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
|
|
9744
9938
|
});
|
|
9745
|
-
}
|
|
9746
|
-
}
|
|
9939
|
+
}
|
|
9940
|
+
}
|
|
9941
|
+
return toolCalls;
|
|
9747
9942
|
}
|
|
9748
9943
|
|
|
9749
9944
|
// src/evaluation/providers/provider-registry.ts
|
|
@@ -9782,7 +9977,7 @@ var ProviderRegistry = class {
|
|
|
9782
9977
|
};
|
|
9783
9978
|
|
|
9784
9979
|
// src/evaluation/providers/targets.ts
|
|
9785
|
-
var
|
|
9980
|
+
var import_node_path21 = __toESM(require("path"), 1);
|
|
9786
9981
|
var import_zod3 = require("zod");
|
|
9787
9982
|
var CliHealthcheckHttpInputSchema = import_zod3.z.object({
|
|
9788
9983
|
url: import_zod3.z.string().min(1, "healthcheck URL is required"),
|
|
@@ -9879,11 +10074,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
|
|
|
9879
10074
|
allowLiteral: true,
|
|
9880
10075
|
optionalEnv: true
|
|
9881
10076
|
});
|
|
9882
|
-
if (cwd && evalFilePath && !
|
|
9883
|
-
cwd =
|
|
10077
|
+
if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
|
|
10078
|
+
cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
|
|
9884
10079
|
}
|
|
9885
10080
|
if (!cwd && evalFilePath) {
|
|
9886
|
-
cwd =
|
|
10081
|
+
cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
|
|
9887
10082
|
}
|
|
9888
10083
|
return {
|
|
9889
10084
|
command,
|
|
@@ -9906,15 +10101,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
9906
10101
|
optionalEnv: true
|
|
9907
10102
|
}
|
|
9908
10103
|
);
|
|
9909
|
-
if (workspaceTemplate && evalFilePath && !
|
|
9910
|
-
workspaceTemplate =
|
|
10104
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10105
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
9911
10106
|
}
|
|
9912
10107
|
let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
|
|
9913
10108
|
allowLiteral: true,
|
|
9914
10109
|
optionalEnv: true
|
|
9915
10110
|
});
|
|
9916
|
-
if (cwd && evalFilePath && !
|
|
9917
|
-
cwd =
|
|
10111
|
+
if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
|
|
10112
|
+
cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
|
|
9918
10113
|
}
|
|
9919
10114
|
if (cwd && workspaceTemplate) {
|
|
9920
10115
|
throw new Error(
|
|
@@ -9922,7 +10117,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
|
|
|
9922
10117
|
);
|
|
9923
10118
|
}
|
|
9924
10119
|
if (!cwd && !workspaceTemplate && evalFilePath) {
|
|
9925
|
-
cwd =
|
|
10120
|
+
cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
|
|
9926
10121
|
}
|
|
9927
10122
|
const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
|
|
9928
10123
|
const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
|
|
@@ -10110,14 +10305,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
|
|
|
10110
10305
|
providerBatching,
|
|
10111
10306
|
config: resolvePiCodingAgentConfig(parsed, env, evalFilePath)
|
|
10112
10307
|
};
|
|
10113
|
-
case "pi-
|
|
10308
|
+
case "pi-cli":
|
|
10114
10309
|
return {
|
|
10115
|
-
kind: "pi-
|
|
10310
|
+
kind: "pi-cli",
|
|
10116
10311
|
name: parsed.name,
|
|
10117
10312
|
graderTarget: parsed.grader_target ?? parsed.judge_target,
|
|
10118
10313
|
workers: parsed.workers,
|
|
10119
10314
|
providerBatching,
|
|
10120
|
-
config:
|
|
10315
|
+
config: resolvePiCliConfig(parsed, env, evalFilePath)
|
|
10121
10316
|
};
|
|
10122
10317
|
case "claude":
|
|
10123
10318
|
case "claude-code":
|
|
@@ -10344,8 +10539,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
|
|
|
10344
10539
|
optionalEnv: true
|
|
10345
10540
|
}
|
|
10346
10541
|
);
|
|
10347
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10348
|
-
workspaceTemplate =
|
|
10542
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10543
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10349
10544
|
}
|
|
10350
10545
|
if (cwd && workspaceTemplate) {
|
|
10351
10546
|
throw new Error(
|
|
@@ -10429,8 +10624,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
|
|
|
10429
10624
|
optionalEnv: true
|
|
10430
10625
|
}
|
|
10431
10626
|
);
|
|
10432
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10433
|
-
workspaceTemplate =
|
|
10627
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10628
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10434
10629
|
}
|
|
10435
10630
|
if (cwd && workspaceTemplate) {
|
|
10436
10631
|
throw new Error(
|
|
@@ -10494,8 +10689,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
|
|
|
10494
10689
|
optionalEnv: true
|
|
10495
10690
|
}
|
|
10496
10691
|
);
|
|
10497
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10498
|
-
workspaceTemplate =
|
|
10692
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10693
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10499
10694
|
}
|
|
10500
10695
|
if (cwd && workspaceTemplate) {
|
|
10501
10696
|
throw new Error(
|
|
@@ -10534,23 +10729,17 @@ function normalizeCopilotLogFormat(value) {
|
|
|
10534
10729
|
throw new Error("copilot log format must be 'summary' or 'json'");
|
|
10535
10730
|
}
|
|
10536
10731
|
function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
10537
|
-
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
10538
10732
|
const subproviderSource = target.subprovider;
|
|
10539
10733
|
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
10540
10734
|
const apiKeySource = target.api_key ?? target.apiKey;
|
|
10541
10735
|
const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
|
|
10542
10736
|
const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
|
|
10543
|
-
const argsSource = target.args ?? target.arguments;
|
|
10544
10737
|
const cwdSource = target.cwd;
|
|
10545
10738
|
const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
|
|
10546
10739
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
10547
10740
|
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
10548
10741
|
const logFormatSource = target.log_format ?? target.logFormat;
|
|
10549
10742
|
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
10550
|
-
const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
|
|
10551
|
-
allowLiteral: true,
|
|
10552
|
-
optionalEnv: true
|
|
10553
|
-
}) ?? "pi";
|
|
10554
10743
|
const subprovider = resolveOptionalString(
|
|
10555
10744
|
subproviderSource,
|
|
10556
10745
|
env,
|
|
@@ -10576,7 +10765,6 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
10576
10765
|
allowLiteral: true,
|
|
10577
10766
|
optionalEnv: true
|
|
10578
10767
|
});
|
|
10579
|
-
const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
|
|
10580
10768
|
const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
|
|
10581
10769
|
allowLiteral: true,
|
|
10582
10770
|
optionalEnv: true
|
|
@@ -10590,8 +10778,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
10590
10778
|
optionalEnv: true
|
|
10591
10779
|
}
|
|
10592
10780
|
);
|
|
10593
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10594
|
-
workspaceTemplate =
|
|
10781
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10782
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10595
10783
|
}
|
|
10596
10784
|
if (cwd && workspaceTemplate) {
|
|
10597
10785
|
throw new Error(
|
|
@@ -10606,13 +10794,11 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
10606
10794
|
const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
|
|
10607
10795
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
10608
10796
|
return {
|
|
10609
|
-
executable,
|
|
10610
10797
|
subprovider,
|
|
10611
10798
|
model,
|
|
10612
10799
|
apiKey,
|
|
10613
10800
|
tools,
|
|
10614
10801
|
thinking,
|
|
10615
|
-
args,
|
|
10616
10802
|
cwd,
|
|
10617
10803
|
workspaceTemplate,
|
|
10618
10804
|
timeoutMs,
|
|
@@ -10621,36 +10807,83 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
|
|
|
10621
10807
|
systemPrompt
|
|
10622
10808
|
};
|
|
10623
10809
|
}
|
|
10624
|
-
function
|
|
10810
|
+
function resolvePiCliConfig(target, env, evalFilePath) {
|
|
10811
|
+
const executableSource = target.executable ?? target.command ?? target.binary;
|
|
10625
10812
|
const subproviderSource = target.subprovider;
|
|
10626
10813
|
const modelSource = target.model ?? target.pi_model ?? target.piModel;
|
|
10627
10814
|
const apiKeySource = target.api_key ?? target.apiKey;
|
|
10815
|
+
const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
|
|
10816
|
+
const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
|
|
10817
|
+
const cwdSource = target.cwd;
|
|
10818
|
+
const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
|
|
10628
10819
|
const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
|
|
10820
|
+
const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
|
|
10821
|
+
const logFormatSource = target.log_format ?? target.logFormat;
|
|
10629
10822
|
const systemPromptSource = target.system_prompt ?? target.systemPrompt;
|
|
10823
|
+
const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
|
|
10824
|
+
allowLiteral: true,
|
|
10825
|
+
optionalEnv: true
|
|
10826
|
+
}) ?? "pi";
|
|
10630
10827
|
const subprovider = resolveOptionalString(
|
|
10631
10828
|
subproviderSource,
|
|
10632
10829
|
env,
|
|
10633
|
-
`${target.name} pi-
|
|
10634
|
-
{
|
|
10635
|
-
allowLiteral: true,
|
|
10636
|
-
optionalEnv: true
|
|
10637
|
-
}
|
|
10830
|
+
`${target.name} pi-cli subprovider`,
|
|
10831
|
+
{ allowLiteral: true, optionalEnv: true }
|
|
10638
10832
|
);
|
|
10639
|
-
const model = resolveOptionalString(modelSource, env, `${target.name} pi-
|
|
10833
|
+
const model = resolveOptionalString(modelSource, env, `${target.name} pi-cli model`, {
|
|
10640
10834
|
allowLiteral: true,
|
|
10641
10835
|
optionalEnv: true
|
|
10642
10836
|
});
|
|
10643
|
-
const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-
|
|
10837
|
+
const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-cli api key`, {
|
|
10644
10838
|
allowLiteral: false,
|
|
10645
10839
|
optionalEnv: true
|
|
10646
10840
|
});
|
|
10647
|
-
const
|
|
10841
|
+
const tools = resolveOptionalString(toolsSource, env, `${target.name} pi-cli tools`, {
|
|
10842
|
+
allowLiteral: true,
|
|
10843
|
+
optionalEnv: true
|
|
10844
|
+
});
|
|
10845
|
+
const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi-cli thinking`, {
|
|
10846
|
+
allowLiteral: true,
|
|
10847
|
+
optionalEnv: true
|
|
10848
|
+
});
|
|
10849
|
+
const rawArgs = target.args ?? target.arguments;
|
|
10850
|
+
const args = resolveOptionalStringArray(rawArgs, env, `${target.name} pi-cli args`);
|
|
10851
|
+
const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi-cli cwd`, {
|
|
10852
|
+
allowLiteral: true,
|
|
10853
|
+
optionalEnv: true
|
|
10854
|
+
});
|
|
10855
|
+
let workspaceTemplate = resolveOptionalString(
|
|
10856
|
+
workspaceTemplateSource,
|
|
10857
|
+
env,
|
|
10858
|
+
`${target.name} pi-cli workspace template`,
|
|
10859
|
+
{ allowLiteral: true, optionalEnv: true }
|
|
10860
|
+
);
|
|
10861
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10862
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10863
|
+
}
|
|
10864
|
+
if (cwd && workspaceTemplate) {
|
|
10865
|
+
throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
|
|
10866
|
+
}
|
|
10867
|
+
const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-cli timeout`);
|
|
10868
|
+
const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi-cli log directory`, {
|
|
10869
|
+
allowLiteral: true,
|
|
10870
|
+
optionalEnv: true
|
|
10871
|
+
});
|
|
10872
|
+
const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
|
|
10648
10873
|
const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
|
|
10649
10874
|
return {
|
|
10875
|
+
executable,
|
|
10650
10876
|
subprovider,
|
|
10651
10877
|
model,
|
|
10652
10878
|
apiKey,
|
|
10879
|
+
tools,
|
|
10880
|
+
thinking,
|
|
10881
|
+
args,
|
|
10882
|
+
cwd,
|
|
10883
|
+
workspaceTemplate,
|
|
10653
10884
|
timeoutMs,
|
|
10885
|
+
logDir,
|
|
10886
|
+
logFormat,
|
|
10654
10887
|
systemPrompt
|
|
10655
10888
|
};
|
|
10656
10889
|
}
|
|
@@ -10679,8 +10912,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
|
|
|
10679
10912
|
optionalEnv: true
|
|
10680
10913
|
}
|
|
10681
10914
|
);
|
|
10682
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10683
|
-
workspaceTemplate =
|
|
10915
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10916
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10684
10917
|
}
|
|
10685
10918
|
if (cwd && workspaceTemplate) {
|
|
10686
10919
|
throw new Error(
|
|
@@ -10738,8 +10971,8 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
|
|
|
10738
10971
|
optionalEnv: true
|
|
10739
10972
|
}
|
|
10740
10973
|
) : void 0;
|
|
10741
|
-
if (workspaceTemplate && evalFilePath && !
|
|
10742
|
-
workspaceTemplate =
|
|
10974
|
+
if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
|
|
10975
|
+
workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
|
|
10743
10976
|
}
|
|
10744
10977
|
const executableSource = target.executable;
|
|
10745
10978
|
const waitSource = target.wait;
|
|
@@ -10780,8 +11013,8 @@ function resolveCliConfig(target, env, evalFilePath) {
|
|
|
10780
11013
|
const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
|
|
10781
11014
|
if (!parseResult.success) {
|
|
10782
11015
|
const firstError = parseResult.error.errors[0];
|
|
10783
|
-
const
|
|
10784
|
-
const prefix =
|
|
11016
|
+
const path48 = firstError?.path.join(".") || "";
|
|
11017
|
+
const prefix = path48 ? `${target.name} ${path48}: ` : `${target.name}: `;
|
|
10785
11018
|
throw new Error(`${prefix}${firstError?.message}`);
|
|
10786
11019
|
}
|
|
10787
11020
|
const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
|
|
@@ -10802,11 +11035,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
|
|
|
10802
11035
|
allowLiteral: true,
|
|
10803
11036
|
optionalEnv: true
|
|
10804
11037
|
});
|
|
10805
|
-
if (cwd && evalFilePath && !
|
|
10806
|
-
cwd =
|
|
11038
|
+
if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
|
|
11039
|
+
cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
|
|
10807
11040
|
}
|
|
10808
11041
|
if (!cwd && evalFilePath) {
|
|
10809
|
-
cwd =
|
|
11042
|
+
cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
|
|
10810
11043
|
}
|
|
10811
11044
|
return {
|
|
10812
11045
|
command,
|
|
@@ -10989,41 +11222,41 @@ function resolveOptionalNumberArray(source, description) {
|
|
|
10989
11222
|
}
|
|
10990
11223
|
|
|
10991
11224
|
// src/evaluation/providers/vscode-provider.ts
|
|
10992
|
-
var
|
|
10993
|
-
var
|
|
10994
|
-
var
|
|
11225
|
+
var import_node_child_process7 = require("child_process");
|
|
11226
|
+
var import_promises24 = require("fs/promises");
|
|
11227
|
+
var import_node_path33 = __toESM(require("path"), 1);
|
|
10995
11228
|
var import_node_util3 = require("util");
|
|
10996
11229
|
|
|
10997
11230
|
// src/evaluation/providers/vscode/dispatch/agentDispatch.ts
|
|
10998
|
-
var
|
|
10999
|
-
var
|
|
11231
|
+
var import_promises22 = require("fs/promises");
|
|
11232
|
+
var import_node_path31 = __toESM(require("path"), 1);
|
|
11000
11233
|
|
|
11001
11234
|
// src/evaluation/providers/vscode/utils/fs.ts
|
|
11002
|
-
var
|
|
11003
|
-
var
|
|
11004
|
-
var
|
|
11235
|
+
var import_node_fs10 = require("fs");
|
|
11236
|
+
var import_promises18 = require("fs/promises");
|
|
11237
|
+
var import_node_path22 = __toESM(require("path"), 1);
|
|
11005
11238
|
async function pathExists(target) {
|
|
11006
11239
|
try {
|
|
11007
|
-
await (0,
|
|
11240
|
+
await (0, import_promises18.access)(target, import_node_fs10.constants.F_OK);
|
|
11008
11241
|
return true;
|
|
11009
11242
|
} catch {
|
|
11010
11243
|
return false;
|
|
11011
11244
|
}
|
|
11012
11245
|
}
|
|
11013
11246
|
async function ensureDir(target) {
|
|
11014
|
-
await (0,
|
|
11247
|
+
await (0, import_promises18.mkdir)(target, { recursive: true });
|
|
11015
11248
|
}
|
|
11016
11249
|
async function readDirEntries(target) {
|
|
11017
|
-
const entries = await (0,
|
|
11250
|
+
const entries = await (0, import_promises18.readdir)(target, { withFileTypes: true });
|
|
11018
11251
|
return entries.map((entry) => ({
|
|
11019
11252
|
name: entry.name,
|
|
11020
|
-
absolutePath:
|
|
11253
|
+
absolutePath: import_node_path22.default.join(target, entry.name),
|
|
11021
11254
|
isDirectory: entry.isDirectory()
|
|
11022
11255
|
}));
|
|
11023
11256
|
}
|
|
11024
11257
|
async function removeIfExists(target) {
|
|
11025
11258
|
try {
|
|
11026
|
-
await (0,
|
|
11259
|
+
await (0, import_promises18.rm)(target, { force: true, recursive: false });
|
|
11027
11260
|
} catch (error) {
|
|
11028
11261
|
if (error.code !== "ENOENT") {
|
|
11029
11262
|
throw error;
|
|
@@ -11032,9 +11265,9 @@ async function removeIfExists(target) {
|
|
|
11032
11265
|
}
|
|
11033
11266
|
|
|
11034
11267
|
// src/evaluation/providers/vscode/utils/path.ts
|
|
11035
|
-
var
|
|
11268
|
+
var import_node_path23 = __toESM(require("path"), 1);
|
|
11036
11269
|
function pathToFileUri2(filePath) {
|
|
11037
|
-
const absolutePath =
|
|
11270
|
+
const absolutePath = import_node_path23.default.isAbsolute(filePath) ? filePath : import_node_path23.default.resolve(filePath);
|
|
11038
11271
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
11039
11272
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
11040
11273
|
return `file:///${normalizedPath}`;
|
|
@@ -11043,7 +11276,7 @@ function pathToFileUri2(filePath) {
|
|
|
11043
11276
|
}
|
|
11044
11277
|
|
|
11045
11278
|
// src/evaluation/providers/vscode/dispatch/promptBuilder.ts
|
|
11046
|
-
var
|
|
11279
|
+
var import_node_path24 = __toESM(require("path"), 1);
|
|
11047
11280
|
|
|
11048
11281
|
// src/evaluation/providers/vscode/utils/template.ts
|
|
11049
11282
|
function renderTemplate2(content, variables) {
|
|
@@ -11135,8 +11368,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
|
|
|
11135
11368
|
});
|
|
11136
11369
|
}
|
|
11137
11370
|
function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
|
|
11138
|
-
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${
|
|
11139
|
-
const responseList = responseFiles.map((file) => `"${
|
|
11371
|
+
const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path24.default.basename(file)}`).join("\n");
|
|
11372
|
+
const responseList = responseFiles.map((file) => `"${import_node_path24.default.basename(file)}"`).join(", ");
|
|
11140
11373
|
return renderTemplate2(templateContent, {
|
|
11141
11374
|
requestFiles: requestLines,
|
|
11142
11375
|
responseList
|
|
@@ -11144,8 +11377,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
|
|
|
11144
11377
|
}
|
|
11145
11378
|
|
|
11146
11379
|
// src/evaluation/providers/vscode/dispatch/responseWaiter.ts
|
|
11147
|
-
var
|
|
11148
|
-
var
|
|
11380
|
+
var import_promises19 = require("fs/promises");
|
|
11381
|
+
var import_node_path25 = __toESM(require("path"), 1);
|
|
11149
11382
|
|
|
11150
11383
|
// src/evaluation/providers/vscode/utils/time.ts
|
|
11151
11384
|
function sleep2(ms) {
|
|
@@ -11183,7 +11416,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
11183
11416
|
const maxAttempts = 10;
|
|
11184
11417
|
while (attempts < maxAttempts) {
|
|
11185
11418
|
try {
|
|
11186
|
-
const content = await (0,
|
|
11419
|
+
const content = await (0, import_promises19.readFile)(responseFileFinal, { encoding: "utf8" });
|
|
11187
11420
|
if (!silent) {
|
|
11188
11421
|
process.stdout.write(`${content}
|
|
11189
11422
|
`);
|
|
@@ -11204,7 +11437,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
|
|
|
11204
11437
|
}
|
|
11205
11438
|
async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
|
|
11206
11439
|
if (!silent) {
|
|
11207
|
-
const fileList = responseFilesFinal.map((file) =>
|
|
11440
|
+
const fileList = responseFilesFinal.map((file) => import_node_path25.default.basename(file)).join(", ");
|
|
11208
11441
|
console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
|
|
11209
11442
|
}
|
|
11210
11443
|
const deadline = Date.now() + timeoutMs;
|
|
@@ -11213,7 +11446,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11213
11446
|
while (pending.size > 0) {
|
|
11214
11447
|
if (Date.now() >= deadline) {
|
|
11215
11448
|
if (!silent) {
|
|
11216
|
-
const remaining = [...pending].map((f) =>
|
|
11449
|
+
const remaining = [...pending].map((f) => import_node_path25.default.basename(f)).join(", ");
|
|
11217
11450
|
console.error(
|
|
11218
11451
|
`error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
|
|
11219
11452
|
);
|
|
@@ -11240,7 +11473,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11240
11473
|
const maxAttempts = 10;
|
|
11241
11474
|
while (attempts < maxAttempts) {
|
|
11242
11475
|
try {
|
|
11243
|
-
const content = await (0,
|
|
11476
|
+
const content = await (0, import_promises19.readFile)(file, { encoding: "utf8" });
|
|
11244
11477
|
if (!silent) {
|
|
11245
11478
|
process.stdout.write(`${content}
|
|
11246
11479
|
`);
|
|
@@ -11262,17 +11495,17 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
|
|
|
11262
11495
|
}
|
|
11263
11496
|
|
|
11264
11497
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
11265
|
-
var
|
|
11266
|
-
var
|
|
11267
|
-
var
|
|
11498
|
+
var import_node_child_process6 = require("child_process");
|
|
11499
|
+
var import_promises20 = require("fs/promises");
|
|
11500
|
+
var import_node_path28 = __toESM(require("path"), 1);
|
|
11268
11501
|
var import_node_util2 = require("util");
|
|
11269
11502
|
|
|
11270
11503
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
11271
|
-
var
|
|
11504
|
+
var import_node_path27 = __toESM(require("path"), 1);
|
|
11272
11505
|
|
|
11273
11506
|
// src/paths.ts
|
|
11274
11507
|
var import_node_os4 = __toESM(require("os"), 1);
|
|
11275
|
-
var
|
|
11508
|
+
var import_node_path26 = __toESM(require("path"), 1);
|
|
11276
11509
|
var logged = false;
|
|
11277
11510
|
function getAgentvHome() {
|
|
11278
11511
|
const envHome = process.env.AGENTV_HOME;
|
|
@@ -11283,19 +11516,19 @@ function getAgentvHome() {
|
|
|
11283
11516
|
}
|
|
11284
11517
|
return envHome;
|
|
11285
11518
|
}
|
|
11286
|
-
return
|
|
11519
|
+
return import_node_path26.default.join(import_node_os4.default.homedir(), ".agentv");
|
|
11287
11520
|
}
|
|
11288
11521
|
function getWorkspacesRoot() {
|
|
11289
|
-
return
|
|
11522
|
+
return import_node_path26.default.join(getAgentvHome(), "workspaces");
|
|
11290
11523
|
}
|
|
11291
11524
|
function getSubagentsRoot() {
|
|
11292
|
-
return
|
|
11525
|
+
return import_node_path26.default.join(getAgentvHome(), "subagents");
|
|
11293
11526
|
}
|
|
11294
11527
|
function getTraceStateRoot() {
|
|
11295
|
-
return
|
|
11528
|
+
return import_node_path26.default.join(getAgentvHome(), "trace-state");
|
|
11296
11529
|
}
|
|
11297
11530
|
function getWorkspacePoolRoot() {
|
|
11298
|
-
return
|
|
11531
|
+
return import_node_path26.default.join(getAgentvHome(), "workspace-pool");
|
|
11299
11532
|
}
|
|
11300
11533
|
|
|
11301
11534
|
// src/evaluation/providers/vscode/dispatch/constants.ts
|
|
@@ -11303,12 +11536,12 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
|
|
|
11303
11536
|
var DEFAULT_ALIVE_FILENAME = ".alive";
|
|
11304
11537
|
function getDefaultSubagentRoot(vscodeCmd = "code") {
|
|
11305
11538
|
const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
|
|
11306
|
-
return
|
|
11539
|
+
return import_node_path27.default.join(getSubagentsRoot(), folder);
|
|
11307
11540
|
}
|
|
11308
11541
|
var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
|
|
11309
11542
|
|
|
11310
11543
|
// src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
|
|
11311
|
-
var execAsync2 = (0, import_node_util2.promisify)(
|
|
11544
|
+
var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process6.exec);
|
|
11312
11545
|
function shellQuote(cmd) {
|
|
11313
11546
|
return cmd.includes(" ") ? `"${cmd}"` : cmd;
|
|
11314
11547
|
}
|
|
@@ -11319,7 +11552,7 @@ model: Grok Code Fast 1 (copilot)
|
|
|
11319
11552
|
function spawnVsCode(vscodeCmd, args, options) {
|
|
11320
11553
|
const useShell = options?.shell ?? true;
|
|
11321
11554
|
const command = useShell ? shellQuote(vscodeCmd) : vscodeCmd;
|
|
11322
|
-
const child = (0,
|
|
11555
|
+
const child = (0, import_node_child_process6.spawn)(command, args, {
|
|
11323
11556
|
windowsHide: true,
|
|
11324
11557
|
shell: useShell,
|
|
11325
11558
|
detached: false
|
|
@@ -11370,12 +11603,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11370
11603
|
await raceSpawnError(child);
|
|
11371
11604
|
return true;
|
|
11372
11605
|
}
|
|
11373
|
-
const aliveFile =
|
|
11606
|
+
const aliveFile = import_node_path28.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
|
|
11374
11607
|
await removeIfExists(aliveFile);
|
|
11375
|
-
const githubAgentsDir =
|
|
11376
|
-
await (0,
|
|
11377
|
-
const wakeupDst =
|
|
11378
|
-
await (0,
|
|
11608
|
+
const githubAgentsDir = import_node_path28.default.join(subagentDir, ".github", "agents");
|
|
11609
|
+
await (0, import_promises20.mkdir)(githubAgentsDir, { recursive: true });
|
|
11610
|
+
const wakeupDst = import_node_path28.default.join(githubAgentsDir, "wakeup.md");
|
|
11611
|
+
await (0, import_promises20.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
|
|
11379
11612
|
const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
|
|
11380
11613
|
label: "open-workspace"
|
|
11381
11614
|
});
|
|
@@ -11387,7 +11620,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11387
11620
|
"chat",
|
|
11388
11621
|
"-m",
|
|
11389
11622
|
wakeupChatId,
|
|
11390
|
-
`create a file named .alive in the ${
|
|
11623
|
+
`create a file named .alive in the ${import_node_path28.default.basename(subagentDir)} folder`
|
|
11391
11624
|
];
|
|
11392
11625
|
const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
|
|
11393
11626
|
await raceSpawnError(wakeupChild);
|
|
@@ -11402,27 +11635,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
|
|
|
11402
11635
|
return true;
|
|
11403
11636
|
}
|
|
11404
11637
|
async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
|
|
11405
|
-
const workspacePath =
|
|
11406
|
-
const messagesDir =
|
|
11407
|
-
await (0,
|
|
11408
|
-
const reqFile =
|
|
11409
|
-
await (0,
|
|
11638
|
+
const workspacePath = import_node_path28.default.join(subagentDir, `${import_node_path28.default.basename(subagentDir)}.code-workspace`);
|
|
11639
|
+
const messagesDir = import_node_path28.default.join(subagentDir, "messages");
|
|
11640
|
+
await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
|
|
11641
|
+
const reqFile = import_node_path28.default.join(messagesDir, `${timestamp}_req.md`);
|
|
11642
|
+
await (0, import_promises20.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
|
|
11410
11643
|
const reqUri = pathToFileUri2(reqFile);
|
|
11411
11644
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
11412
11645
|
for (const attachment of attachmentPaths) {
|
|
11413
11646
|
chatArgs.push("-a", attachment);
|
|
11414
11647
|
}
|
|
11415
11648
|
chatArgs.push("-a", reqFile);
|
|
11416
|
-
chatArgs.push(`Follow instructions in [${
|
|
11649
|
+
chatArgs.push(`Follow instructions in [${import_node_path28.default.basename(reqFile)}](${reqUri})`);
|
|
11417
11650
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
11418
11651
|
workspacePath,
|
|
11419
|
-
|
|
11652
|
+
import_node_path28.default.basename(subagentDir),
|
|
11420
11653
|
subagentDir,
|
|
11421
11654
|
vscodeCmd
|
|
11422
11655
|
);
|
|
11423
11656
|
if (!workspaceReady) {
|
|
11424
11657
|
throw new Error(
|
|
11425
|
-
`VS Code workspace '${
|
|
11658
|
+
`VS Code workspace '${import_node_path28.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
11426
11659
|
);
|
|
11427
11660
|
}
|
|
11428
11661
|
await sleep2(500);
|
|
@@ -11430,9 +11663,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
|
|
|
11430
11663
|
await raceSpawnError(child);
|
|
11431
11664
|
}
|
|
11432
11665
|
async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
|
|
11433
|
-
const workspacePath =
|
|
11434
|
-
const messagesDir =
|
|
11435
|
-
await (0,
|
|
11666
|
+
const workspacePath = import_node_path28.default.join(subagentDir, `${import_node_path28.default.basename(subagentDir)}.code-workspace`);
|
|
11667
|
+
const messagesDir = import_node_path28.default.join(subagentDir, "messages");
|
|
11668
|
+
await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
|
|
11436
11669
|
const chatArgs = ["-r", "chat", "-m", chatId];
|
|
11437
11670
|
for (const attachment of attachmentPaths) {
|
|
11438
11671
|
chatArgs.push("-a", attachment);
|
|
@@ -11440,13 +11673,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
11440
11673
|
chatArgs.push(chatInstruction);
|
|
11441
11674
|
const workspaceReady = await ensureWorkspaceFocused(
|
|
11442
11675
|
workspacePath,
|
|
11443
|
-
|
|
11676
|
+
import_node_path28.default.basename(subagentDir),
|
|
11444
11677
|
subagentDir,
|
|
11445
11678
|
vscodeCmd
|
|
11446
11679
|
);
|
|
11447
11680
|
if (!workspaceReady) {
|
|
11448
11681
|
throw new Error(
|
|
11449
|
-
`VS Code workspace '${
|
|
11682
|
+
`VS Code workspace '${import_node_path28.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
|
|
11450
11683
|
);
|
|
11451
11684
|
}
|
|
11452
11685
|
await sleep2(500);
|
|
@@ -11455,11 +11688,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
|
|
|
11455
11688
|
}
|
|
11456
11689
|
|
|
11457
11690
|
// src/evaluation/providers/vscode/dispatch/workspaceManager.ts
|
|
11458
|
-
var
|
|
11459
|
-
var
|
|
11691
|
+
var import_promises21 = require("fs/promises");
|
|
11692
|
+
var import_node_path30 = __toESM(require("path"), 1);
|
|
11460
11693
|
|
|
11461
11694
|
// src/evaluation/providers/vscode/utils/workspace.ts
|
|
11462
|
-
var
|
|
11695
|
+
var import_node_path29 = __toESM(require("path"), 1);
|
|
11463
11696
|
var import_json5 = __toESM(require("json5"), 1);
|
|
11464
11697
|
function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
11465
11698
|
let workspace;
|
|
@@ -11476,10 +11709,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
11476
11709
|
}
|
|
11477
11710
|
const transformedFolders = workspace.folders.map((folder) => {
|
|
11478
11711
|
const folderPath = folder.path;
|
|
11479
|
-
if (
|
|
11712
|
+
if (import_node_path29.default.isAbsolute(folderPath)) {
|
|
11480
11713
|
return folder;
|
|
11481
11714
|
}
|
|
11482
|
-
const absolutePath =
|
|
11715
|
+
const absolutePath = import_node_path29.default.resolve(templateDir, folderPath);
|
|
11483
11716
|
return {
|
|
11484
11717
|
...folder,
|
|
11485
11718
|
path: absolutePath
|
|
@@ -11501,19 +11734,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
|
|
|
11501
11734
|
if (locationMap && typeof locationMap === "object") {
|
|
11502
11735
|
const transformedMap = {};
|
|
11503
11736
|
for (const [locationPath, value] of Object.entries(locationMap)) {
|
|
11504
|
-
const isAbsolute =
|
|
11737
|
+
const isAbsolute = import_node_path29.default.isAbsolute(locationPath);
|
|
11505
11738
|
if (isAbsolute) {
|
|
11506
11739
|
transformedMap[locationPath] = value;
|
|
11507
11740
|
} else {
|
|
11508
11741
|
const firstGlobIndex = locationPath.search(/[*]/);
|
|
11509
11742
|
if (firstGlobIndex === -1) {
|
|
11510
|
-
const resolvedPath =
|
|
11743
|
+
const resolvedPath = import_node_path29.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
|
|
11511
11744
|
transformedMap[resolvedPath] = value;
|
|
11512
11745
|
} else {
|
|
11513
11746
|
const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
|
|
11514
11747
|
const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
|
|
11515
11748
|
const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
|
|
11516
|
-
const resolvedPath = (
|
|
11749
|
+
const resolvedPath = (import_node_path29.default.resolve(templateDir, basePath) + patternPath).replace(
|
|
11517
11750
|
/\\/g,
|
|
11518
11751
|
"/"
|
|
11519
11752
|
);
|
|
@@ -11554,7 +11787,7 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
11554
11787
|
number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
|
|
11555
11788
|
})).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
|
|
11556
11789
|
for (const subagent of subagents) {
|
|
11557
|
-
const lockFile =
|
|
11790
|
+
const lockFile = import_node_path30.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
|
|
11558
11791
|
if (!await pathExists(lockFile)) {
|
|
11559
11792
|
return subagent.absolutePath;
|
|
11560
11793
|
}
|
|
@@ -11564,26 +11797,26 @@ async function findUnlockedSubagent(subagentRoot) {
|
|
|
11564
11797
|
async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
11565
11798
|
let workspaceContent;
|
|
11566
11799
|
if (workspaceTemplate) {
|
|
11567
|
-
const workspaceSrc =
|
|
11800
|
+
const workspaceSrc = import_node_path30.default.resolve(workspaceTemplate);
|
|
11568
11801
|
if (!await pathExists(workspaceSrc)) {
|
|
11569
11802
|
throw new Error(`workspace template not found: ${workspaceSrc}`);
|
|
11570
11803
|
}
|
|
11571
|
-
const stats = await (0,
|
|
11804
|
+
const stats = await (0, import_promises21.stat)(workspaceSrc);
|
|
11572
11805
|
if (!stats.isFile()) {
|
|
11573
11806
|
throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
|
|
11574
11807
|
}
|
|
11575
|
-
const templateText = await (0,
|
|
11808
|
+
const templateText = await (0, import_promises21.readFile)(workspaceSrc, "utf8");
|
|
11576
11809
|
workspaceContent = JSON.parse(templateText);
|
|
11577
11810
|
} else {
|
|
11578
11811
|
workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
|
|
11579
11812
|
}
|
|
11580
|
-
const workspaceName = `${
|
|
11581
|
-
const workspaceDst =
|
|
11582
|
-
const templateDir = workspaceTemplate ?
|
|
11813
|
+
const workspaceName = `${import_node_path30.default.basename(subagentDir)}.code-workspace`;
|
|
11814
|
+
const workspaceDst = import_node_path30.default.join(subagentDir, workspaceName);
|
|
11815
|
+
const templateDir = workspaceTemplate ? import_node_path30.default.dirname(import_node_path30.default.resolve(workspaceTemplate)) : subagentDir;
|
|
11583
11816
|
const workspaceJson = JSON.stringify(workspaceContent, null, 2);
|
|
11584
11817
|
let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
|
|
11585
11818
|
if (cwd) {
|
|
11586
|
-
const absCwd =
|
|
11819
|
+
const absCwd = import_node_path30.default.resolve(cwd);
|
|
11587
11820
|
const parsed = JSON.parse(transformedContent);
|
|
11588
11821
|
const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
|
|
11589
11822
|
if (!alreadyPresent) {
|
|
@@ -11591,36 +11824,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
|
|
|
11591
11824
|
transformedContent = JSON.stringify(parsed, null, 2);
|
|
11592
11825
|
}
|
|
11593
11826
|
}
|
|
11594
|
-
await (0,
|
|
11595
|
-
const messagesDir =
|
|
11596
|
-
await (0,
|
|
11827
|
+
await (0, import_promises21.writeFile)(workspaceDst, transformedContent, "utf8");
|
|
11828
|
+
const messagesDir = import_node_path30.default.join(subagentDir, "messages");
|
|
11829
|
+
await (0, import_promises21.mkdir)(messagesDir, { recursive: true });
|
|
11597
11830
|
return { workspace: workspaceDst, messagesDir };
|
|
11598
11831
|
}
|
|
11599
11832
|
async function createSubagentLock(subagentDir) {
|
|
11600
|
-
const messagesDir =
|
|
11833
|
+
const messagesDir = import_node_path30.default.join(subagentDir, "messages");
|
|
11601
11834
|
if (await pathExists(messagesDir)) {
|
|
11602
|
-
const files = await (0,
|
|
11835
|
+
const files = await (0, import_promises21.readdir)(messagesDir);
|
|
11603
11836
|
await Promise.all(
|
|
11604
11837
|
files.map(async (file) => {
|
|
11605
|
-
const target =
|
|
11838
|
+
const target = import_node_path30.default.join(messagesDir, file);
|
|
11606
11839
|
await removeIfExists(target);
|
|
11607
11840
|
})
|
|
11608
11841
|
);
|
|
11609
11842
|
}
|
|
11610
|
-
const githubAgentsDir =
|
|
11843
|
+
const githubAgentsDir = import_node_path30.default.join(subagentDir, ".github", "agents");
|
|
11611
11844
|
if (await pathExists(githubAgentsDir)) {
|
|
11612
|
-
const agentFiles = await (0,
|
|
11845
|
+
const agentFiles = await (0, import_promises21.readdir)(githubAgentsDir);
|
|
11613
11846
|
const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
|
|
11614
11847
|
await Promise.all(
|
|
11615
|
-
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(
|
|
11848
|
+
agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path30.default.join(githubAgentsDir, file)))
|
|
11616
11849
|
);
|
|
11617
11850
|
}
|
|
11618
|
-
const lockFile =
|
|
11619
|
-
await (0,
|
|
11851
|
+
const lockFile = import_node_path30.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
11852
|
+
await (0, import_promises21.writeFile)(lockFile, "", { encoding: "utf8" });
|
|
11620
11853
|
return lockFile;
|
|
11621
11854
|
}
|
|
11622
11855
|
async function removeSubagentLock(subagentDir) {
|
|
11623
|
-
const lockFile =
|
|
11856
|
+
const lockFile = import_node_path30.default.join(subagentDir, DEFAULT_LOCK_NAME);
|
|
11624
11857
|
await removeIfExists(lockFile);
|
|
11625
11858
|
}
|
|
11626
11859
|
async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
|
|
@@ -11640,11 +11873,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
|
|
|
11640
11873
|
return 1;
|
|
11641
11874
|
}
|
|
11642
11875
|
if (promptFile) {
|
|
11643
|
-
const githubAgentsDir =
|
|
11644
|
-
await (0,
|
|
11645
|
-
const agentFile =
|
|
11876
|
+
const githubAgentsDir = import_node_path30.default.join(subagentDir, ".github", "agents");
|
|
11877
|
+
await (0, import_promises21.mkdir)(githubAgentsDir, { recursive: true });
|
|
11878
|
+
const agentFile = import_node_path30.default.join(githubAgentsDir, `${chatId}.md`);
|
|
11646
11879
|
try {
|
|
11647
|
-
await (0,
|
|
11880
|
+
await (0, import_promises21.copyFile)(promptFile, agentFile);
|
|
11648
11881
|
} catch (error) {
|
|
11649
11882
|
console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
|
|
11650
11883
|
return 1;
|
|
@@ -11661,11 +11894,11 @@ async function resolvePromptFile(promptFile) {
|
|
|
11661
11894
|
if (!promptFile) {
|
|
11662
11895
|
return void 0;
|
|
11663
11896
|
}
|
|
11664
|
-
const resolvedPrompt =
|
|
11897
|
+
const resolvedPrompt = import_node_path31.default.resolve(promptFile);
|
|
11665
11898
|
if (!await pathExists(resolvedPrompt)) {
|
|
11666
11899
|
throw new Error(`Prompt file not found: ${resolvedPrompt}`);
|
|
11667
11900
|
}
|
|
11668
|
-
const promptStats = await (0,
|
|
11901
|
+
const promptStats = await (0, import_promises22.stat)(resolvedPrompt);
|
|
11669
11902
|
if (!promptStats.isFile()) {
|
|
11670
11903
|
throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
|
|
11671
11904
|
}
|
|
@@ -11677,7 +11910,7 @@ async function resolveAttachments(extraAttachments) {
|
|
|
11677
11910
|
}
|
|
11678
11911
|
const resolved = [];
|
|
11679
11912
|
for (const attachment of extraAttachments) {
|
|
11680
|
-
const resolvedPath =
|
|
11913
|
+
const resolvedPath = import_node_path31.default.resolve(attachment);
|
|
11681
11914
|
if (!await pathExists(resolvedPath)) {
|
|
11682
11915
|
throw new Error(`Attachment not found: ${resolvedPath}`);
|
|
11683
11916
|
}
|
|
@@ -11719,7 +11952,7 @@ async function dispatchAgentSession(options) {
|
|
|
11719
11952
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
11720
11953
|
};
|
|
11721
11954
|
}
|
|
11722
|
-
const subagentName =
|
|
11955
|
+
const subagentName = import_node_path31.default.basename(subagentDir);
|
|
11723
11956
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
11724
11957
|
const preparationResult = await prepareSubagentDirectory(
|
|
11725
11958
|
subagentDir,
|
|
@@ -11747,9 +11980,9 @@ async function dispatchAgentSession(options) {
|
|
|
11747
11980
|
};
|
|
11748
11981
|
}
|
|
11749
11982
|
const timestamp = generateTimestamp();
|
|
11750
|
-
const messagesDir =
|
|
11751
|
-
const responseFileTmp =
|
|
11752
|
-
const responseFileFinal =
|
|
11983
|
+
const messagesDir = import_node_path31.default.join(subagentDir, "messages");
|
|
11984
|
+
const responseFileTmp = import_node_path31.default.join(messagesDir, `${timestamp}_res.tmp.md`);
|
|
11985
|
+
const responseFileFinal = import_node_path31.default.join(messagesDir, `${timestamp}_res.md`);
|
|
11753
11986
|
const requestInstructions = createRequestPrompt(
|
|
11754
11987
|
userQuery,
|
|
11755
11988
|
responseFileTmp,
|
|
@@ -11854,7 +12087,7 @@ async function dispatchBatchAgent(options) {
|
|
|
11854
12087
|
error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
|
|
11855
12088
|
};
|
|
11856
12089
|
}
|
|
11857
|
-
subagentName =
|
|
12090
|
+
subagentName = import_node_path31.default.basename(subagentDir);
|
|
11858
12091
|
const chatId = Math.random().toString(16).slice(2, 10);
|
|
11859
12092
|
const preparationResult = await prepareSubagentDirectory(
|
|
11860
12093
|
subagentDir,
|
|
@@ -11885,24 +12118,24 @@ async function dispatchBatchAgent(options) {
|
|
|
11885
12118
|
};
|
|
11886
12119
|
}
|
|
11887
12120
|
const timestamp = generateTimestamp();
|
|
11888
|
-
const messagesDir =
|
|
12121
|
+
const messagesDir = import_node_path31.default.join(subagentDir, "messages");
|
|
11889
12122
|
requestFiles = userQueries.map(
|
|
11890
|
-
(_, index) =>
|
|
12123
|
+
(_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_req.md`)
|
|
11891
12124
|
);
|
|
11892
12125
|
const responseTmpFiles = userQueries.map(
|
|
11893
|
-
(_, index) =>
|
|
12126
|
+
(_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
|
|
11894
12127
|
);
|
|
11895
12128
|
responseFilesFinal = userQueries.map(
|
|
11896
|
-
(_, index) =>
|
|
12129
|
+
(_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_res.md`)
|
|
11897
12130
|
);
|
|
11898
|
-
const orchestratorFile =
|
|
12131
|
+
const orchestratorFile = import_node_path31.default.join(messagesDir, `${timestamp}_orchestrator.md`);
|
|
11899
12132
|
if (!dryRun) {
|
|
11900
12133
|
await Promise.all(
|
|
11901
12134
|
userQueries.map((query, index) => {
|
|
11902
12135
|
const reqFile = requestFiles[index];
|
|
11903
12136
|
const tmpFile = responseTmpFiles[index];
|
|
11904
12137
|
const finalFile = responseFilesFinal[index];
|
|
11905
|
-
return (0,
|
|
12138
|
+
return (0, import_promises22.writeFile)(
|
|
11906
12139
|
reqFile,
|
|
11907
12140
|
createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
|
|
11908
12141
|
{ encoding: "utf8" }
|
|
@@ -11914,7 +12147,7 @@ async function dispatchBatchAgent(options) {
|
|
|
11914
12147
|
responseFilesFinal,
|
|
11915
12148
|
orchestratorTemplateContent
|
|
11916
12149
|
);
|
|
11917
|
-
await (0,
|
|
12150
|
+
await (0, import_promises22.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
|
|
11918
12151
|
}
|
|
11919
12152
|
const chatAttachments = [orchestratorFile, ...attachments];
|
|
11920
12153
|
const orchestratorUri = pathToFileUri2(orchestratorFile);
|
|
@@ -11980,8 +12213,8 @@ async function dispatchBatchAgent(options) {
|
|
|
11980
12213
|
}
|
|
11981
12214
|
|
|
11982
12215
|
// src/evaluation/providers/vscode/dispatch/provision.ts
|
|
11983
|
-
var
|
|
11984
|
-
var
|
|
12216
|
+
var import_promises23 = require("fs/promises");
|
|
12217
|
+
var import_node_path32 = __toESM(require("path"), 1);
|
|
11985
12218
|
var DEFAULT_WORKSPACE_TEMPLATE2 = {
|
|
11986
12219
|
folders: [
|
|
11987
12220
|
{
|
|
@@ -12012,7 +12245,7 @@ async function provisionSubagents(options) {
|
|
|
12012
12245
|
if (!Number.isInteger(subagents) || subagents < 1) {
|
|
12013
12246
|
throw new Error("subagents must be a positive integer");
|
|
12014
12247
|
}
|
|
12015
|
-
const targetPath =
|
|
12248
|
+
const targetPath = import_node_path32.default.resolve(targetRoot);
|
|
12016
12249
|
if (!dryRun) {
|
|
12017
12250
|
await ensureDir(targetPath);
|
|
12018
12251
|
}
|
|
@@ -12032,7 +12265,7 @@ async function provisionSubagents(options) {
|
|
|
12032
12265
|
continue;
|
|
12033
12266
|
}
|
|
12034
12267
|
highestNumber = Math.max(highestNumber, parsed);
|
|
12035
|
-
const lockFile =
|
|
12268
|
+
const lockFile = import_node_path32.default.join(entry.absolutePath, lockName);
|
|
12036
12269
|
const locked = await pathExists(lockFile);
|
|
12037
12270
|
if (locked) {
|
|
12038
12271
|
lockedSubagents.add(entry.absolutePath);
|
|
@@ -12049,10 +12282,10 @@ async function provisionSubagents(options) {
|
|
|
12049
12282
|
break;
|
|
12050
12283
|
}
|
|
12051
12284
|
const subagentDir = subagent.absolutePath;
|
|
12052
|
-
const githubAgentsDir =
|
|
12053
|
-
const lockFile =
|
|
12054
|
-
const workspaceDst =
|
|
12055
|
-
const wakeupDst =
|
|
12285
|
+
const githubAgentsDir = import_node_path32.default.join(subagentDir, ".github", "agents");
|
|
12286
|
+
const lockFile = import_node_path32.default.join(subagentDir, lockName);
|
|
12287
|
+
const workspaceDst = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
|
|
12288
|
+
const wakeupDst = import_node_path32.default.join(githubAgentsDir, "wakeup.md");
|
|
12056
12289
|
const isLocked = await pathExists(lockFile);
|
|
12057
12290
|
if (isLocked && !force) {
|
|
12058
12291
|
continue;
|
|
@@ -12061,8 +12294,8 @@ async function provisionSubagents(options) {
|
|
|
12061
12294
|
if (!dryRun) {
|
|
12062
12295
|
await removeIfExists(lockFile);
|
|
12063
12296
|
await ensureDir(githubAgentsDir);
|
|
12064
|
-
await (0,
|
|
12065
|
-
await (0,
|
|
12297
|
+
await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12298
|
+
await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12066
12299
|
}
|
|
12067
12300
|
created.push(subagentDir);
|
|
12068
12301
|
lockedSubagents.delete(subagentDir);
|
|
@@ -12072,8 +12305,8 @@ async function provisionSubagents(options) {
|
|
|
12072
12305
|
if (!isLocked && force) {
|
|
12073
12306
|
if (!dryRun) {
|
|
12074
12307
|
await ensureDir(githubAgentsDir);
|
|
12075
|
-
await (0,
|
|
12076
|
-
await (0,
|
|
12308
|
+
await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12309
|
+
await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12077
12310
|
}
|
|
12078
12311
|
created.push(subagentDir);
|
|
12079
12312
|
subagentsProvisioned += 1;
|
|
@@ -12081,8 +12314,8 @@ async function provisionSubagents(options) {
|
|
|
12081
12314
|
}
|
|
12082
12315
|
if (!dryRun && !await pathExists(workspaceDst)) {
|
|
12083
12316
|
await ensureDir(githubAgentsDir);
|
|
12084
|
-
await (0,
|
|
12085
|
-
await (0,
|
|
12317
|
+
await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12318
|
+
await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12086
12319
|
}
|
|
12087
12320
|
skippedExisting.push(subagentDir);
|
|
12088
12321
|
subagentsProvisioned += 1;
|
|
@@ -12090,15 +12323,15 @@ async function provisionSubagents(options) {
|
|
|
12090
12323
|
let nextIndex = highestNumber;
|
|
12091
12324
|
while (subagentsProvisioned < subagents) {
|
|
12092
12325
|
nextIndex += 1;
|
|
12093
|
-
const subagentDir =
|
|
12094
|
-
const githubAgentsDir =
|
|
12095
|
-
const workspaceDst =
|
|
12096
|
-
const wakeupDst =
|
|
12326
|
+
const subagentDir = import_node_path32.default.join(targetPath, `subagent-${nextIndex}`);
|
|
12327
|
+
const githubAgentsDir = import_node_path32.default.join(subagentDir, ".github", "agents");
|
|
12328
|
+
const workspaceDst = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
|
|
12329
|
+
const wakeupDst = import_node_path32.default.join(githubAgentsDir, "wakeup.md");
|
|
12097
12330
|
if (!dryRun) {
|
|
12098
12331
|
await ensureDir(subagentDir);
|
|
12099
12332
|
await ensureDir(githubAgentsDir);
|
|
12100
|
-
await (0,
|
|
12101
|
-
await (0,
|
|
12333
|
+
await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
|
|
12334
|
+
await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
|
|
12102
12335
|
}
|
|
12103
12336
|
created.push(subagentDir);
|
|
12104
12337
|
subagentsProvisioned += 1;
|
|
@@ -12140,7 +12373,7 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
|
|
|
12140
12373
|
`;
|
|
12141
12374
|
|
|
12142
12375
|
// src/evaluation/providers/vscode-provider.ts
|
|
12143
|
-
var execAsync3 = (0, import_node_util3.promisify)(
|
|
12376
|
+
var execAsync3 = (0, import_node_util3.promisify)(import_node_child_process7.exec);
|
|
12144
12377
|
var VSCodeProvider = class {
|
|
12145
12378
|
id;
|
|
12146
12379
|
kind;
|
|
@@ -12283,9 +12516,9 @@ var VSCodeProvider = class {
|
|
|
12283
12516
|
async function locateVSCodeExecutable(candidate) {
|
|
12284
12517
|
const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
|
|
12285
12518
|
if (includesPathSeparator) {
|
|
12286
|
-
const resolved =
|
|
12519
|
+
const resolved = import_node_path33.default.isAbsolute(candidate) ? candidate : import_node_path33.default.resolve(candidate);
|
|
12287
12520
|
try {
|
|
12288
|
-
await (0,
|
|
12521
|
+
await (0, import_promises24.access)(resolved, import_promises24.constants.F_OK);
|
|
12289
12522
|
return resolved;
|
|
12290
12523
|
} catch {
|
|
12291
12524
|
throw new Error(
|
|
@@ -12298,7 +12531,7 @@ async function locateVSCodeExecutable(candidate) {
|
|
|
12298
12531
|
const { stdout } = await execAsync3(`${locator} ${candidate}`);
|
|
12299
12532
|
const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
12300
12533
|
if (lines.length > 0 && lines[0]) {
|
|
12301
|
-
await (0,
|
|
12534
|
+
await (0, import_promises24.access)(lines[0], import_promises24.constants.F_OK);
|
|
12302
12535
|
return lines[0];
|
|
12303
12536
|
}
|
|
12304
12537
|
} catch {
|
|
@@ -12312,7 +12545,7 @@ async function resolveWorkspaceTemplateFile(template) {
|
|
|
12312
12545
|
return void 0;
|
|
12313
12546
|
}
|
|
12314
12547
|
try {
|
|
12315
|
-
const stats = await (0,
|
|
12548
|
+
const stats = await (0, import_promises24.stat)(import_node_path33.default.resolve(template));
|
|
12316
12549
|
return stats.isFile() ? template : void 0;
|
|
12317
12550
|
} catch {
|
|
12318
12551
|
return template;
|
|
@@ -12336,7 +12569,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
|
|
|
12336
12569
|
return "";
|
|
12337
12570
|
}
|
|
12338
12571
|
const buildList = (files) => files.map((absolutePath) => {
|
|
12339
|
-
const fileName =
|
|
12572
|
+
const fileName = import_node_path33.default.basename(absolutePath);
|
|
12340
12573
|
const fileUri = pathToFileUri3(absolutePath);
|
|
12341
12574
|
return `* [${fileName}](${fileUri})`;
|
|
12342
12575
|
});
|
|
@@ -12357,7 +12590,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
12357
12590
|
}
|
|
12358
12591
|
const unique = /* @__PURE__ */ new Map();
|
|
12359
12592
|
for (const attachment of attachments) {
|
|
12360
|
-
const absolutePath =
|
|
12593
|
+
const absolutePath = import_node_path33.default.resolve(attachment);
|
|
12361
12594
|
if (!unique.has(absolutePath)) {
|
|
12362
12595
|
unique.set(absolutePath, absolutePath);
|
|
12363
12596
|
}
|
|
@@ -12365,7 +12598,7 @@ function collectAttachmentFiles(attachments) {
|
|
|
12365
12598
|
return Array.from(unique.values());
|
|
12366
12599
|
}
|
|
12367
12600
|
function pathToFileUri3(filePath) {
|
|
12368
|
-
const absolutePath =
|
|
12601
|
+
const absolutePath = import_node_path33.default.isAbsolute(filePath) ? filePath : import_node_path33.default.resolve(filePath);
|
|
12369
12602
|
const normalizedPath = absolutePath.replace(/\\/g, "/");
|
|
12370
12603
|
if (/^[a-zA-Z]:\//.test(normalizedPath)) {
|
|
12371
12604
|
return `file:///${normalizedPath}`;
|
|
@@ -12378,7 +12611,7 @@ function normalizeAttachments(attachments) {
|
|
|
12378
12611
|
}
|
|
12379
12612
|
const deduped = /* @__PURE__ */ new Set();
|
|
12380
12613
|
for (const attachment of attachments) {
|
|
12381
|
-
deduped.add(
|
|
12614
|
+
deduped.add(import_node_path33.default.resolve(attachment));
|
|
12382
12615
|
}
|
|
12383
12616
|
return Array.from(deduped);
|
|
12384
12617
|
}
|
|
@@ -12387,7 +12620,7 @@ function mergeAttachments(all) {
|
|
|
12387
12620
|
for (const list of all) {
|
|
12388
12621
|
if (!list) continue;
|
|
12389
12622
|
for (const inputFile of list) {
|
|
12390
|
-
deduped.add(
|
|
12623
|
+
deduped.add(import_node_path33.default.resolve(inputFile));
|
|
12391
12624
|
}
|
|
12392
12625
|
}
|
|
12393
12626
|
return deduped.size > 0 ? Array.from(deduped) : void 0;
|
|
@@ -12434,9 +12667,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
|
|
|
12434
12667
|
}
|
|
12435
12668
|
|
|
12436
12669
|
// src/evaluation/providers/targets-file.ts
|
|
12437
|
-
var
|
|
12438
|
-
var
|
|
12439
|
-
var
|
|
12670
|
+
var import_node_fs11 = require("fs");
|
|
12671
|
+
var import_promises25 = require("fs/promises");
|
|
12672
|
+
var import_node_path34 = __toESM(require("path"), 1);
|
|
12440
12673
|
var import_yaml6 = require("yaml");
|
|
12441
12674
|
function isRecord(value) {
|
|
12442
12675
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
@@ -12466,18 +12699,18 @@ function assertTargetDefinition(value, index, filePath) {
|
|
|
12466
12699
|
}
|
|
12467
12700
|
async function fileExists3(filePath) {
|
|
12468
12701
|
try {
|
|
12469
|
-
await (0,
|
|
12702
|
+
await (0, import_promises25.access)(filePath, import_node_fs11.constants.F_OK);
|
|
12470
12703
|
return true;
|
|
12471
12704
|
} catch {
|
|
12472
12705
|
return false;
|
|
12473
12706
|
}
|
|
12474
12707
|
}
|
|
12475
12708
|
async function readTargetDefinitions(filePath) {
|
|
12476
|
-
const absolutePath =
|
|
12709
|
+
const absolutePath = import_node_path34.default.resolve(filePath);
|
|
12477
12710
|
if (!await fileExists3(absolutePath)) {
|
|
12478
12711
|
throw new Error(`targets.yaml not found at ${absolutePath}`);
|
|
12479
12712
|
}
|
|
12480
|
-
const raw = await (0,
|
|
12713
|
+
const raw = await (0, import_promises25.readFile)(absolutePath, "utf8");
|
|
12481
12714
|
const parsed = (0, import_yaml6.parse)(raw);
|
|
12482
12715
|
if (!isRecord(parsed)) {
|
|
12483
12716
|
throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
|
|
@@ -12493,16 +12726,16 @@ function listTargetNames(definitions) {
|
|
|
12493
12726
|
}
|
|
12494
12727
|
|
|
12495
12728
|
// src/evaluation/providers/provider-discovery.ts
|
|
12496
|
-
var
|
|
12729
|
+
var import_node_path35 = __toESM(require("path"), 1);
|
|
12497
12730
|
var import_fast_glob2 = __toESM(require("fast-glob"), 1);
|
|
12498
12731
|
async function discoverProviders(registry, baseDir) {
|
|
12499
12732
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
12500
12733
|
const candidateDirs = [];
|
|
12501
|
-
let dir =
|
|
12502
|
-
const root =
|
|
12734
|
+
let dir = import_node_path35.default.resolve(baseDir);
|
|
12735
|
+
const root = import_node_path35.default.parse(dir).root;
|
|
12503
12736
|
while (dir !== root) {
|
|
12504
|
-
candidateDirs.push(
|
|
12505
|
-
dir =
|
|
12737
|
+
candidateDirs.push(import_node_path35.default.join(dir, ".agentv", "providers"));
|
|
12738
|
+
dir = import_node_path35.default.dirname(dir);
|
|
12506
12739
|
}
|
|
12507
12740
|
let files = [];
|
|
12508
12741
|
for (const providersDir of candidateDirs) {
|
|
@@ -12518,7 +12751,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
12518
12751
|
}
|
|
12519
12752
|
const discoveredKinds = [];
|
|
12520
12753
|
for (const filePath of files) {
|
|
12521
|
-
const basename =
|
|
12754
|
+
const basename = import_node_path35.default.basename(filePath);
|
|
12522
12755
|
const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
12523
12756
|
if (registry.has(kindName)) {
|
|
12524
12757
|
continue;
|
|
@@ -12536,7 +12769,7 @@ async function discoverProviders(registry, baseDir) {
|
|
|
12536
12769
|
// src/evaluation/providers/index.ts
|
|
12537
12770
|
function createBuiltinProviderRegistry() {
|
|
12538
12771
|
const registry = new ProviderRegistry();
|
|
12539
|
-
registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-
|
|
12772
|
+
registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
|
|
12540
12773
|
"vscode-insiders",
|
|
12541
12774
|
(t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
|
|
12542
12775
|
);
|
|
@@ -12625,9 +12858,9 @@ function negateScore(score) {
|
|
|
12625
12858
|
}
|
|
12626
12859
|
|
|
12627
12860
|
// src/evaluation/evaluators/code-evaluator.ts
|
|
12628
|
-
var
|
|
12861
|
+
var import_promises26 = require("fs/promises");
|
|
12629
12862
|
var import_node_os5 = require("os");
|
|
12630
|
-
var
|
|
12863
|
+
var import_node_path36 = require("path");
|
|
12631
12864
|
|
|
12632
12865
|
// src/runtime/exec.ts
|
|
12633
12866
|
function shellEscapePath(value) {
|
|
@@ -12727,15 +12960,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
|
|
|
12727
12960
|
});
|
|
12728
12961
|
}
|
|
12729
12962
|
async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
12730
|
-
const { mkdir:
|
|
12963
|
+
const { mkdir: mkdir18, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
|
|
12731
12964
|
const { tmpdir: tmpdir3 } = await import("os");
|
|
12732
|
-
const
|
|
12733
|
-
const { randomUUID:
|
|
12734
|
-
const dir =
|
|
12735
|
-
await
|
|
12736
|
-
const stdinPath =
|
|
12737
|
-
const stdoutPath =
|
|
12738
|
-
const stderrPath =
|
|
12965
|
+
const path48 = await import("path");
|
|
12966
|
+
const { randomUUID: randomUUID10 } = await import("crypto");
|
|
12967
|
+
const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
|
|
12968
|
+
await mkdir18(dir, { recursive: true });
|
|
12969
|
+
const stdinPath = path48.join(dir, "stdin.txt");
|
|
12970
|
+
const stdoutPath = path48.join(dir, "stdout.txt");
|
|
12971
|
+
const stderrPath = path48.join(dir, "stderr.txt");
|
|
12739
12972
|
await writeFile9(stdinPath, stdinPayload, "utf8");
|
|
12740
12973
|
const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
|
|
12741
12974
|
const { spawn: spawn5 } = await import("child_process");
|
|
@@ -12774,12 +13007,12 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
|
|
|
12774
13007
|
}
|
|
12775
13008
|
|
|
12776
13009
|
// src/runtime/target-proxy.ts
|
|
12777
|
-
var
|
|
13010
|
+
var import_node_crypto9 = require("crypto");
|
|
12778
13011
|
var import_node_http = require("http");
|
|
12779
13012
|
var DEFAULT_MAX_CALLS = 50;
|
|
12780
13013
|
async function createTargetProxy(options) {
|
|
12781
13014
|
const { defaultProvider, targetResolver, availableTargets, maxCalls } = options;
|
|
12782
|
-
const token = (0,
|
|
13015
|
+
const token = (0, import_node_crypto9.randomBytes)(32).toString("hex");
|
|
12783
13016
|
let callCount = 0;
|
|
12784
13017
|
let isShutdown = false;
|
|
12785
13018
|
let totalInputTokens = 0;
|
|
@@ -13071,9 +13304,9 @@ var CodeEvaluator = class {
|
|
|
13071
13304
|
if (outputForPayload) {
|
|
13072
13305
|
const serialized = JSON.stringify(outputForPayload);
|
|
13073
13306
|
if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
|
|
13074
|
-
const tmpDir = await (0,
|
|
13075
|
-
outputPath = (0,
|
|
13076
|
-
await (0,
|
|
13307
|
+
const tmpDir = await (0, import_promises26.mkdtemp)((0, import_node_path36.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
|
|
13308
|
+
outputPath = (0, import_node_path36.join)(tmpDir, "output.json");
|
|
13309
|
+
await (0, import_promises26.writeFile)(outputPath, serialized);
|
|
13077
13310
|
outputForPayload = null;
|
|
13078
13311
|
}
|
|
13079
13312
|
}
|
|
@@ -13182,7 +13415,7 @@ var CodeEvaluator = class {
|
|
|
13182
13415
|
await proxyShutdown();
|
|
13183
13416
|
}
|
|
13184
13417
|
if (outputPath) {
|
|
13185
|
-
await (0,
|
|
13418
|
+
await (0, import_promises26.rm)((0, import_node_path36.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
|
|
13186
13419
|
});
|
|
13187
13420
|
}
|
|
13188
13421
|
}
|
|
@@ -13218,7 +13451,7 @@ var AGENT_PROVIDER_KINDS = [
|
|
|
13218
13451
|
"copilot-sdk",
|
|
13219
13452
|
"copilot-cli",
|
|
13220
13453
|
"pi-coding-agent",
|
|
13221
|
-
"pi-
|
|
13454
|
+
"pi-cli",
|
|
13222
13455
|
"claude",
|
|
13223
13456
|
"claude-cli",
|
|
13224
13457
|
"claude-sdk",
|
|
@@ -13245,8 +13478,8 @@ function isAgentProvider(provider) {
|
|
|
13245
13478
|
}
|
|
13246
13479
|
|
|
13247
13480
|
// src/evaluation/evaluators/llm-grader.ts
|
|
13248
|
-
var
|
|
13249
|
-
var
|
|
13481
|
+
var import_promises27 = __toESM(require("fs/promises"), 1);
|
|
13482
|
+
var import_node_path37 = __toESM(require("path"), 1);
|
|
13250
13483
|
var import_ai2 = require("ai");
|
|
13251
13484
|
var import_zod4 = require("zod");
|
|
13252
13485
|
var DEFAULT_MAX_STEPS = 10;
|
|
@@ -13435,7 +13668,7 @@ ${context2.fileChanges}`;
|
|
|
13435
13668
|
async evaluateWithRubrics(context2, graderProvider, rubrics) {
|
|
13436
13669
|
if (!rubrics || rubrics.length === 0) {
|
|
13437
13670
|
throw new Error(
|
|
13438
|
-
`No rubrics found for evaluator "${context2.evaluator?.name ?? "llm-grader"}".
|
|
13671
|
+
`No rubrics found for evaluator "${context2.evaluator?.name ?? "llm-grader"}". Add rubric criteria under assertions or use the agentv-eval-writer skill for authoring help.`
|
|
13439
13672
|
);
|
|
13440
13673
|
}
|
|
13441
13674
|
const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
|
|
@@ -14101,8 +14334,8 @@ function calculateScoreRangeResult(result, rubrics) {
|
|
|
14101
14334
|
};
|
|
14102
14335
|
}
|
|
14103
14336
|
function resolveSandboxed(basePath, relativePath) {
|
|
14104
|
-
const resolved =
|
|
14105
|
-
if (!resolved.startsWith(basePath +
|
|
14337
|
+
const resolved = import_node_path37.default.resolve(basePath, relativePath);
|
|
14338
|
+
if (!resolved.startsWith(basePath + import_node_path37.default.sep) && resolved !== basePath) {
|
|
14106
14339
|
throw new Error(`Path '${relativePath}' is outside the workspace`);
|
|
14107
14340
|
}
|
|
14108
14341
|
return resolved;
|
|
@@ -14117,7 +14350,7 @@ function createFilesystemTools(workspacePath) {
|
|
|
14117
14350
|
execute: async (input) => {
|
|
14118
14351
|
try {
|
|
14119
14352
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
14120
|
-
const entries = await
|
|
14353
|
+
const entries = await import_promises27.default.readdir(resolved, { withFileTypes: true });
|
|
14121
14354
|
return entries.map((e) => ({
|
|
14122
14355
|
name: e.name,
|
|
14123
14356
|
type: e.isDirectory() ? "directory" : "file"
|
|
@@ -14135,12 +14368,12 @@ function createFilesystemTools(workspacePath) {
|
|
|
14135
14368
|
execute: async (input) => {
|
|
14136
14369
|
try {
|
|
14137
14370
|
const resolved = resolveSandboxed(workspacePath, input.path);
|
|
14138
|
-
const stat8 = await
|
|
14371
|
+
const stat8 = await import_promises27.default.stat(resolved);
|
|
14139
14372
|
if (stat8.isDirectory()) {
|
|
14140
14373
|
return { error: `'${input.path}' is a directory, not a file` };
|
|
14141
14374
|
}
|
|
14142
14375
|
const buffer = Buffer.alloc(Math.min(stat8.size, MAX_FILE_SIZE));
|
|
14143
|
-
const fd = await
|
|
14376
|
+
const fd = await import_promises27.default.open(resolved, "r");
|
|
14144
14377
|
try {
|
|
14145
14378
|
await fd.read(buffer, 0, buffer.length, 0);
|
|
14146
14379
|
} finally {
|
|
@@ -14185,30 +14418,30 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
|
|
|
14185
14418
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14186
14419
|
let entries;
|
|
14187
14420
|
try {
|
|
14188
|
-
entries = await
|
|
14421
|
+
entries = await import_promises27.default.readdir(dirPath, { withFileTypes: true });
|
|
14189
14422
|
} catch {
|
|
14190
14423
|
return;
|
|
14191
14424
|
}
|
|
14192
14425
|
for (const entry of entries) {
|
|
14193
14426
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14194
14427
|
if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
|
|
14195
|
-
const fullPath =
|
|
14428
|
+
const fullPath = import_node_path37.default.join(dirPath, entry.name);
|
|
14196
14429
|
if (entry.isDirectory()) {
|
|
14197
14430
|
await searchDirectory(fullPath, workspacePath, regex, matches);
|
|
14198
14431
|
} else if (entry.isFile()) {
|
|
14199
|
-
const ext =
|
|
14432
|
+
const ext = import_node_path37.default.extname(entry.name).toLowerCase();
|
|
14200
14433
|
if (BINARY_EXTENSIONS.has(ext)) continue;
|
|
14201
14434
|
try {
|
|
14202
|
-
const stat8 = await
|
|
14435
|
+
const stat8 = await import_promises27.default.stat(fullPath);
|
|
14203
14436
|
if (stat8.size > MAX_FILE_SIZE) continue;
|
|
14204
|
-
const content = await
|
|
14437
|
+
const content = await import_promises27.default.readFile(fullPath, "utf-8");
|
|
14205
14438
|
const lines = content.split("\n");
|
|
14206
14439
|
for (let i = 0; i < lines.length; i++) {
|
|
14207
14440
|
if (matches.length >= MAX_SEARCH_MATCHES) return;
|
|
14208
14441
|
regex.lastIndex = 0;
|
|
14209
14442
|
if (regex.test(lines[i])) {
|
|
14210
14443
|
matches.push({
|
|
14211
|
-
file:
|
|
14444
|
+
file: import_node_path37.default.relative(workspacePath, fullPath),
|
|
14212
14445
|
line: i + 1,
|
|
14213
14446
|
text: lines[i].substring(0, 200)
|
|
14214
14447
|
});
|
|
@@ -14843,115 +15076,115 @@ var FieldAccuracyEvaluator = class {
|
|
|
14843
15076
|
* Evaluate a single field against the expected value.
|
|
14844
15077
|
*/
|
|
14845
15078
|
evaluateField(fieldConfig, candidateData, expectedData) {
|
|
14846
|
-
const { path:
|
|
14847
|
-
const candidateValue = resolvePath(candidateData,
|
|
14848
|
-
const expectedValue = resolvePath(expectedData,
|
|
15079
|
+
const { path: path48, match, required = true, weight = 1 } = fieldConfig;
|
|
15080
|
+
const candidateValue = resolvePath(candidateData, path48);
|
|
15081
|
+
const expectedValue = resolvePath(expectedData, path48);
|
|
14849
15082
|
if (expectedValue === void 0) {
|
|
14850
15083
|
return {
|
|
14851
|
-
path:
|
|
15084
|
+
path: path48,
|
|
14852
15085
|
score: 1,
|
|
14853
15086
|
// No expected value means no comparison needed
|
|
14854
15087
|
weight,
|
|
14855
15088
|
hit: true,
|
|
14856
|
-
message: `${
|
|
15089
|
+
message: `${path48}: no expected value`
|
|
14857
15090
|
};
|
|
14858
15091
|
}
|
|
14859
15092
|
if (candidateValue === void 0) {
|
|
14860
15093
|
if (required) {
|
|
14861
15094
|
return {
|
|
14862
|
-
path:
|
|
15095
|
+
path: path48,
|
|
14863
15096
|
score: 0,
|
|
14864
15097
|
weight,
|
|
14865
15098
|
hit: false,
|
|
14866
|
-
message: `${
|
|
15099
|
+
message: `${path48} (required, missing)`
|
|
14867
15100
|
};
|
|
14868
15101
|
}
|
|
14869
15102
|
return {
|
|
14870
|
-
path:
|
|
15103
|
+
path: path48,
|
|
14871
15104
|
score: 1,
|
|
14872
15105
|
// Don't penalize missing optional fields
|
|
14873
15106
|
weight: 0,
|
|
14874
15107
|
// Zero weight means it won't affect the score
|
|
14875
15108
|
hit: true,
|
|
14876
|
-
message: `${
|
|
15109
|
+
message: `${path48}: optional field missing`
|
|
14877
15110
|
};
|
|
14878
15111
|
}
|
|
14879
15112
|
switch (match) {
|
|
14880
15113
|
case "exact":
|
|
14881
|
-
return this.compareExact(
|
|
15114
|
+
return this.compareExact(path48, candidateValue, expectedValue, weight);
|
|
14882
15115
|
case "numeric_tolerance":
|
|
14883
15116
|
return this.compareNumericTolerance(
|
|
14884
|
-
|
|
15117
|
+
path48,
|
|
14885
15118
|
candidateValue,
|
|
14886
15119
|
expectedValue,
|
|
14887
15120
|
fieldConfig,
|
|
14888
15121
|
weight
|
|
14889
15122
|
);
|
|
14890
15123
|
case "date":
|
|
14891
|
-
return this.compareDate(
|
|
15124
|
+
return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
|
|
14892
15125
|
default:
|
|
14893
15126
|
return {
|
|
14894
|
-
path:
|
|
15127
|
+
path: path48,
|
|
14895
15128
|
score: 0,
|
|
14896
15129
|
weight,
|
|
14897
15130
|
hit: false,
|
|
14898
|
-
message: `${
|
|
15131
|
+
message: `${path48}: unknown match type "${match}"`
|
|
14899
15132
|
};
|
|
14900
15133
|
}
|
|
14901
15134
|
}
|
|
14902
15135
|
/**
|
|
14903
15136
|
* Exact equality comparison.
|
|
14904
15137
|
*/
|
|
14905
|
-
compareExact(
|
|
15138
|
+
compareExact(path48, candidateValue, expectedValue, weight) {
|
|
14906
15139
|
if (deepEqual(candidateValue, expectedValue)) {
|
|
14907
15140
|
return {
|
|
14908
|
-
path:
|
|
15141
|
+
path: path48,
|
|
14909
15142
|
score: 1,
|
|
14910
15143
|
weight,
|
|
14911
15144
|
hit: true,
|
|
14912
|
-
message:
|
|
15145
|
+
message: path48
|
|
14913
15146
|
};
|
|
14914
15147
|
}
|
|
14915
15148
|
if (typeof candidateValue !== typeof expectedValue) {
|
|
14916
15149
|
return {
|
|
14917
|
-
path:
|
|
15150
|
+
path: path48,
|
|
14918
15151
|
score: 0,
|
|
14919
15152
|
weight,
|
|
14920
15153
|
hit: false,
|
|
14921
|
-
message: `${
|
|
15154
|
+
message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
|
|
14922
15155
|
};
|
|
14923
15156
|
}
|
|
14924
15157
|
return {
|
|
14925
|
-
path:
|
|
15158
|
+
path: path48,
|
|
14926
15159
|
score: 0,
|
|
14927
15160
|
weight,
|
|
14928
15161
|
hit: false,
|
|
14929
|
-
message: `${
|
|
15162
|
+
message: `${path48} (value mismatch)`
|
|
14930
15163
|
};
|
|
14931
15164
|
}
|
|
14932
15165
|
/**
|
|
14933
15166
|
* Numeric comparison with absolute or relative tolerance.
|
|
14934
15167
|
*/
|
|
14935
|
-
compareNumericTolerance(
|
|
15168
|
+
compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
14936
15169
|
const { tolerance = 0, relative = false } = fieldConfig;
|
|
14937
15170
|
const candidateNum = toNumber(candidateValue);
|
|
14938
15171
|
const expectedNum = toNumber(expectedValue);
|
|
14939
15172
|
if (candidateNum === null || expectedNum === null) {
|
|
14940
15173
|
return {
|
|
14941
|
-
path:
|
|
15174
|
+
path: path48,
|
|
14942
15175
|
score: 0,
|
|
14943
15176
|
weight,
|
|
14944
15177
|
hit: false,
|
|
14945
|
-
message: `${
|
|
15178
|
+
message: `${path48} (non-numeric value)`
|
|
14946
15179
|
};
|
|
14947
15180
|
}
|
|
14948
15181
|
if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
|
|
14949
15182
|
return {
|
|
14950
|
-
path:
|
|
15183
|
+
path: path48,
|
|
14951
15184
|
score: 0,
|
|
14952
15185
|
weight,
|
|
14953
15186
|
hit: false,
|
|
14954
|
-
message: `${
|
|
15187
|
+
message: `${path48} (invalid numeric value)`
|
|
14955
15188
|
};
|
|
14956
15189
|
}
|
|
14957
15190
|
const diff = Math.abs(candidateNum - expectedNum);
|
|
@@ -14964,61 +15197,61 @@ var FieldAccuracyEvaluator = class {
|
|
|
14964
15197
|
}
|
|
14965
15198
|
if (withinTolerance) {
|
|
14966
15199
|
return {
|
|
14967
|
-
path:
|
|
15200
|
+
path: path48,
|
|
14968
15201
|
score: 1,
|
|
14969
15202
|
weight,
|
|
14970
15203
|
hit: true,
|
|
14971
|
-
message: `${
|
|
15204
|
+
message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
|
|
14972
15205
|
};
|
|
14973
15206
|
}
|
|
14974
15207
|
return {
|
|
14975
|
-
path:
|
|
15208
|
+
path: path48,
|
|
14976
15209
|
score: 0,
|
|
14977
15210
|
weight,
|
|
14978
15211
|
hit: false,
|
|
14979
|
-
message: `${
|
|
15212
|
+
message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
|
|
14980
15213
|
};
|
|
14981
15214
|
}
|
|
14982
15215
|
/**
|
|
14983
15216
|
* Date comparison with format normalization.
|
|
14984
15217
|
*/
|
|
14985
|
-
compareDate(
|
|
15218
|
+
compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
|
|
14986
15219
|
const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
|
|
14987
15220
|
const candidateDate = parseDate(String(candidateValue), formats);
|
|
14988
15221
|
const expectedDate = parseDate(String(expectedValue), formats);
|
|
14989
15222
|
if (candidateDate === null) {
|
|
14990
15223
|
return {
|
|
14991
|
-
path:
|
|
15224
|
+
path: path48,
|
|
14992
15225
|
score: 0,
|
|
14993
15226
|
weight,
|
|
14994
15227
|
hit: false,
|
|
14995
|
-
message: `${
|
|
15228
|
+
message: `${path48} (unparseable candidate date)`
|
|
14996
15229
|
};
|
|
14997
15230
|
}
|
|
14998
15231
|
if (expectedDate === null) {
|
|
14999
15232
|
return {
|
|
15000
|
-
path:
|
|
15233
|
+
path: path48,
|
|
15001
15234
|
score: 0,
|
|
15002
15235
|
weight,
|
|
15003
15236
|
hit: false,
|
|
15004
|
-
message: `${
|
|
15237
|
+
message: `${path48} (unparseable expected date)`
|
|
15005
15238
|
};
|
|
15006
15239
|
}
|
|
15007
15240
|
if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
|
|
15008
15241
|
return {
|
|
15009
|
-
path:
|
|
15242
|
+
path: path48,
|
|
15010
15243
|
score: 1,
|
|
15011
15244
|
weight,
|
|
15012
15245
|
hit: true,
|
|
15013
|
-
message:
|
|
15246
|
+
message: path48
|
|
15014
15247
|
};
|
|
15015
15248
|
}
|
|
15016
15249
|
return {
|
|
15017
|
-
path:
|
|
15250
|
+
path: path48,
|
|
15018
15251
|
score: 0,
|
|
15019
15252
|
weight,
|
|
15020
15253
|
hit: false,
|
|
15021
|
-
message: `${
|
|
15254
|
+
message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
|
|
15022
15255
|
};
|
|
15023
15256
|
}
|
|
15024
15257
|
/**
|
|
@@ -15051,11 +15284,11 @@ var FieldAccuracyEvaluator = class {
|
|
|
15051
15284
|
};
|
|
15052
15285
|
}
|
|
15053
15286
|
};
|
|
15054
|
-
function resolvePath(obj,
|
|
15055
|
-
if (!
|
|
15287
|
+
function resolvePath(obj, path48) {
|
|
15288
|
+
if (!path48 || !obj) {
|
|
15056
15289
|
return void 0;
|
|
15057
15290
|
}
|
|
15058
|
-
const parts =
|
|
15291
|
+
const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
|
|
15059
15292
|
let current = obj;
|
|
15060
15293
|
for (const part of parts) {
|
|
15061
15294
|
if (current === null || current === void 0) {
|
|
@@ -15221,9 +15454,7 @@ var PROVIDER_TOOL_SEMANTICS = {
|
|
|
15221
15454
|
"claude-sdk": CLAUDE_MATCHER,
|
|
15222
15455
|
codex: CODEX_MATCHER,
|
|
15223
15456
|
"pi-coding-agent": PI_CODING_AGENT_MATCHER,
|
|
15224
|
-
|
|
15225
|
-
// TODO: consider removing pi-agent-sdk provider entirely.
|
|
15226
|
-
"pi-agent-sdk": PI_CODING_AGENT_MATCHER,
|
|
15457
|
+
"pi-cli": PI_CODING_AGENT_MATCHER,
|
|
15227
15458
|
"copilot-cli": COPILOT_MATCHER,
|
|
15228
15459
|
"copilot-sdk": COPILOT_MATCHER,
|
|
15229
15460
|
vscode: COPILOT_MATCHER,
|
|
@@ -15538,8 +15769,8 @@ var TokenUsageEvaluator = class {
|
|
|
15538
15769
|
};
|
|
15539
15770
|
|
|
15540
15771
|
// src/evaluation/evaluators/tool-trajectory.ts
|
|
15541
|
-
function getNestedValue(obj,
|
|
15542
|
-
const parts =
|
|
15772
|
+
function getNestedValue(obj, path48) {
|
|
15773
|
+
const parts = path48.split(".");
|
|
15543
15774
|
let current = obj;
|
|
15544
15775
|
for (const part of parts) {
|
|
15545
15776
|
if (current === null || current === void 0 || typeof current !== "object") {
|
|
@@ -16159,9 +16390,9 @@ function runEqualsAssertion(output, value) {
|
|
|
16159
16390
|
}
|
|
16160
16391
|
|
|
16161
16392
|
// src/evaluation/orchestrator.ts
|
|
16162
|
-
var
|
|
16163
|
-
var
|
|
16164
|
-
var
|
|
16393
|
+
var import_node_crypto11 = require("crypto");
|
|
16394
|
+
var import_promises31 = require("fs/promises");
|
|
16395
|
+
var import_node_path46 = __toESM(require("path"), 1);
|
|
16165
16396
|
var import_micromatch3 = __toESM(require("micromatch"), 1);
|
|
16166
16397
|
|
|
16167
16398
|
// ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
|
|
@@ -16375,7 +16606,7 @@ var InlineAssertEvaluator = class {
|
|
|
16375
16606
|
};
|
|
16376
16607
|
|
|
16377
16608
|
// src/evaluation/evaluators/prompt-resolution.ts
|
|
16378
|
-
var
|
|
16609
|
+
var import_node_path38 = __toESM(require("path"), 1);
|
|
16379
16610
|
async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
|
|
16380
16611
|
if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
|
|
16381
16612
|
if (!context2) {
|
|
@@ -16421,7 +16652,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
16421
16652
|
};
|
|
16422
16653
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
16423
16654
|
const scriptPath = script[script.length - 1];
|
|
16424
|
-
const cwd =
|
|
16655
|
+
const cwd = import_node_path38.default.dirname(scriptPath);
|
|
16425
16656
|
try {
|
|
16426
16657
|
const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
|
|
16427
16658
|
const prompt = stdout.trim();
|
|
@@ -16693,16 +16924,16 @@ function createBuiltinRegistry() {
|
|
|
16693
16924
|
}
|
|
16694
16925
|
|
|
16695
16926
|
// src/evaluation/registry/assertion-discovery.ts
|
|
16696
|
-
var
|
|
16927
|
+
var import_node_path39 = __toESM(require("path"), 1);
|
|
16697
16928
|
var import_fast_glob3 = __toESM(require("fast-glob"), 1);
|
|
16698
16929
|
async function discoverAssertions(registry, baseDir) {
|
|
16699
16930
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
16700
16931
|
const candidateDirs = [];
|
|
16701
|
-
let dir =
|
|
16702
|
-
const root =
|
|
16932
|
+
let dir = import_node_path39.default.resolve(baseDir);
|
|
16933
|
+
const root = import_node_path39.default.parse(dir).root;
|
|
16703
16934
|
while (dir !== root) {
|
|
16704
|
-
candidateDirs.push(
|
|
16705
|
-
dir =
|
|
16935
|
+
candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "assertions"));
|
|
16936
|
+
dir = import_node_path39.default.dirname(dir);
|
|
16706
16937
|
}
|
|
16707
16938
|
let files = [];
|
|
16708
16939
|
for (const assertionsDir of candidateDirs) {
|
|
@@ -16718,7 +16949,7 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
16718
16949
|
}
|
|
16719
16950
|
const discoveredTypes = [];
|
|
16720
16951
|
for (const filePath of files) {
|
|
16721
|
-
const basename =
|
|
16952
|
+
const basename = import_node_path39.default.basename(filePath);
|
|
16722
16953
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
16723
16954
|
if (registry.has(typeName)) {
|
|
16724
16955
|
continue;
|
|
@@ -16736,17 +16967,17 @@ async function discoverAssertions(registry, baseDir) {
|
|
|
16736
16967
|
}
|
|
16737
16968
|
|
|
16738
16969
|
// src/evaluation/registry/grader-discovery.ts
|
|
16739
|
-
var
|
|
16970
|
+
var import_node_path40 = __toESM(require("path"), 1);
|
|
16740
16971
|
var import_fast_glob4 = __toESM(require("fast-glob"), 1);
|
|
16741
16972
|
async function discoverGraders(registry, baseDir) {
|
|
16742
16973
|
const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
|
|
16743
16974
|
const candidateDirs = [];
|
|
16744
|
-
let dir =
|
|
16745
|
-
const root =
|
|
16975
|
+
let dir = import_node_path40.default.resolve(baseDir);
|
|
16976
|
+
const root = import_node_path40.default.parse(dir).root;
|
|
16746
16977
|
while (dir !== root) {
|
|
16747
|
-
candidateDirs.push(
|
|
16748
|
-
candidateDirs.push(
|
|
16749
|
-
dir =
|
|
16978
|
+
candidateDirs.push(import_node_path40.default.join(dir, ".agentv", "graders"));
|
|
16979
|
+
candidateDirs.push(import_node_path40.default.join(dir, ".agentv", "judges"));
|
|
16980
|
+
dir = import_node_path40.default.dirname(dir);
|
|
16750
16981
|
}
|
|
16751
16982
|
let files = [];
|
|
16752
16983
|
for (const gradersDir of candidateDirs) {
|
|
@@ -16762,7 +16993,7 @@ async function discoverGraders(registry, baseDir) {
|
|
|
16762
16993
|
}
|
|
16763
16994
|
const discoveredTypes = [];
|
|
16764
16995
|
for (const filePath of files) {
|
|
16765
|
-
const basename =
|
|
16996
|
+
const basename = import_node_path40.default.basename(filePath);
|
|
16766
16997
|
const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
|
|
16767
16998
|
if (registry.has(typeName)) {
|
|
16768
16999
|
continue;
|
|
@@ -16920,11 +17151,11 @@ function getTCritical(df) {
|
|
|
16920
17151
|
}
|
|
16921
17152
|
|
|
16922
17153
|
// src/evaluation/workspace/file-changes.ts
|
|
16923
|
-
var
|
|
16924
|
-
var
|
|
16925
|
-
var
|
|
17154
|
+
var import_node_child_process8 = require("child_process");
|
|
17155
|
+
var import_node_fs12 = require("fs");
|
|
17156
|
+
var import_node_path41 = __toESM(require("path"), 1);
|
|
16926
17157
|
var import_node_util4 = require("util");
|
|
16927
|
-
var execAsync4 = (0, import_node_util4.promisify)(
|
|
17158
|
+
var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process8.exec);
|
|
16928
17159
|
function gitExecOpts(workspacePath) {
|
|
16929
17160
|
const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
|
|
16930
17161
|
return { cwd: workspacePath, env };
|
|
@@ -16950,16 +17181,16 @@ async function captureFileChanges(workspacePath, baselineCommit) {
|
|
|
16950
17181
|
async function stageNestedRepoChanges(workspacePath) {
|
|
16951
17182
|
let entries;
|
|
16952
17183
|
try {
|
|
16953
|
-
entries = (0,
|
|
17184
|
+
entries = (0, import_node_fs12.readdirSync)(workspacePath);
|
|
16954
17185
|
} catch {
|
|
16955
17186
|
return;
|
|
16956
17187
|
}
|
|
16957
17188
|
for (const entry of entries) {
|
|
16958
17189
|
if (entry === ".git" || entry === "node_modules") continue;
|
|
16959
|
-
const childPath =
|
|
17190
|
+
const childPath = import_node_path41.default.join(workspacePath, entry);
|
|
16960
17191
|
try {
|
|
16961
|
-
if (!(0,
|
|
16962
|
-
if (!(0,
|
|
17192
|
+
if (!(0, import_node_fs12.statSync)(childPath).isDirectory()) continue;
|
|
17193
|
+
if (!(0, import_node_fs12.statSync)(import_node_path41.default.join(childPath, ".git")).isDirectory()) continue;
|
|
16963
17194
|
} catch {
|
|
16964
17195
|
continue;
|
|
16965
17196
|
}
|
|
@@ -16969,8 +17200,8 @@ async function stageNestedRepoChanges(workspacePath) {
|
|
|
16969
17200
|
}
|
|
16970
17201
|
|
|
16971
17202
|
// src/evaluation/workspace/manager.ts
|
|
16972
|
-
var
|
|
16973
|
-
var
|
|
17203
|
+
var import_promises28 = require("fs/promises");
|
|
17204
|
+
var import_node_path42 = __toESM(require("path"), 1);
|
|
16974
17205
|
var TemplateNotFoundError = class extends Error {
|
|
16975
17206
|
constructor(templatePath) {
|
|
16976
17207
|
super(`Workspace template not found: ${templatePath}`);
|
|
@@ -16992,7 +17223,7 @@ var WorkspaceCreationError = class extends Error {
|
|
|
16992
17223
|
};
|
|
16993
17224
|
async function isDirectory(filePath) {
|
|
16994
17225
|
try {
|
|
16995
|
-
const stats = await (0,
|
|
17226
|
+
const stats = await (0, import_promises28.stat)(filePath);
|
|
16996
17227
|
return stats.isDirectory();
|
|
16997
17228
|
} catch {
|
|
16998
17229
|
return false;
|
|
@@ -17000,26 +17231,26 @@ async function isDirectory(filePath) {
|
|
|
17000
17231
|
}
|
|
17001
17232
|
function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
|
|
17002
17233
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
17003
|
-
return
|
|
17234
|
+
return import_node_path42.default.join(root, evalRunId, caseId);
|
|
17004
17235
|
}
|
|
17005
17236
|
async function copyDirectoryRecursive(src, dest) {
|
|
17006
|
-
await (0,
|
|
17007
|
-
const entries = await (0,
|
|
17237
|
+
await (0, import_promises28.mkdir)(dest, { recursive: true });
|
|
17238
|
+
const entries = await (0, import_promises28.readdir)(src, { withFileTypes: true });
|
|
17008
17239
|
for (const entry of entries) {
|
|
17009
|
-
const srcPath =
|
|
17010
|
-
const destPath =
|
|
17240
|
+
const srcPath = import_node_path42.default.join(src, entry.name);
|
|
17241
|
+
const destPath = import_node_path42.default.join(dest, entry.name);
|
|
17011
17242
|
if (entry.name === ".git") {
|
|
17012
17243
|
continue;
|
|
17013
17244
|
}
|
|
17014
17245
|
if (entry.isDirectory()) {
|
|
17015
17246
|
await copyDirectoryRecursive(srcPath, destPath);
|
|
17016
17247
|
} else {
|
|
17017
|
-
await (0,
|
|
17248
|
+
await (0, import_promises28.cp)(srcPath, destPath, { preserveTimestamps: true });
|
|
17018
17249
|
}
|
|
17019
17250
|
}
|
|
17020
17251
|
}
|
|
17021
17252
|
async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
|
|
17022
|
-
const resolvedTemplatePath =
|
|
17253
|
+
const resolvedTemplatePath = import_node_path42.default.resolve(templatePath);
|
|
17023
17254
|
if (!await fileExists2(resolvedTemplatePath)) {
|
|
17024
17255
|
throw new TemplateNotFoundError(resolvedTemplatePath);
|
|
17025
17256
|
}
|
|
@@ -17029,7 +17260,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
17029
17260
|
const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
|
|
17030
17261
|
try {
|
|
17031
17262
|
if (await fileExists2(workspacePath)) {
|
|
17032
|
-
await (0,
|
|
17263
|
+
await (0, import_promises28.rm)(workspacePath, { recursive: true, force: true });
|
|
17033
17264
|
}
|
|
17034
17265
|
await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
|
|
17035
17266
|
return workspacePath;
|
|
@@ -17063,25 +17294,25 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
|
|
|
17063
17294
|
}
|
|
17064
17295
|
async function cleanupWorkspace(workspacePath) {
|
|
17065
17296
|
if (await fileExists2(workspacePath)) {
|
|
17066
|
-
await (0,
|
|
17297
|
+
await (0, import_promises28.rm)(workspacePath, { recursive: true, force: true });
|
|
17067
17298
|
}
|
|
17068
17299
|
}
|
|
17069
17300
|
async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
|
|
17070
17301
|
const root = workspaceRoot ?? getWorkspacesRoot();
|
|
17071
|
-
const evalDir =
|
|
17302
|
+
const evalDir = import_node_path42.default.join(root, evalRunId);
|
|
17072
17303
|
if (await fileExists2(evalDir)) {
|
|
17073
|
-
await (0,
|
|
17304
|
+
await (0, import_promises28.rm)(evalDir, { recursive: true, force: true });
|
|
17074
17305
|
}
|
|
17075
17306
|
}
|
|
17076
17307
|
|
|
17077
17308
|
// src/evaluation/workspace/pool-manager.ts
|
|
17078
|
-
var
|
|
17079
|
-
var
|
|
17080
|
-
var
|
|
17081
|
-
var
|
|
17082
|
-
var
|
|
17309
|
+
var import_node_child_process9 = require("child_process");
|
|
17310
|
+
var import_node_crypto10 = require("crypto");
|
|
17311
|
+
var import_node_fs13 = require("fs");
|
|
17312
|
+
var import_promises29 = require("fs/promises");
|
|
17313
|
+
var import_node_path43 = __toESM(require("path"), 1);
|
|
17083
17314
|
var import_node_util5 = require("util");
|
|
17084
|
-
var execFileAsync = (0, import_node_util5.promisify)(
|
|
17315
|
+
var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process9.execFile);
|
|
17085
17316
|
function gitEnv() {
|
|
17086
17317
|
const env = { ...process.env };
|
|
17087
17318
|
for (const key of Object.keys(env)) {
|
|
@@ -17127,14 +17358,14 @@ function computeWorkspaceFingerprint(repos) {
|
|
|
17127
17358
|
const canonical = {
|
|
17128
17359
|
repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
|
|
17129
17360
|
};
|
|
17130
|
-
return (0,
|
|
17361
|
+
return (0, import_node_crypto10.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
|
|
17131
17362
|
}
|
|
17132
17363
|
async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
17133
|
-
await (0,
|
|
17134
|
-
const entries = await (0,
|
|
17364
|
+
await (0, import_promises29.mkdir)(dest, { recursive: true });
|
|
17365
|
+
const entries = await (0, import_promises29.readdir)(src, { withFileTypes: true });
|
|
17135
17366
|
for (const entry of entries) {
|
|
17136
|
-
const srcPath =
|
|
17137
|
-
const destPath =
|
|
17367
|
+
const srcPath = import_node_path43.default.join(src, entry.name);
|
|
17368
|
+
const destPath = import_node_path43.default.join(dest, entry.name);
|
|
17138
17369
|
if (entry.name === ".git") {
|
|
17139
17370
|
continue;
|
|
17140
17371
|
}
|
|
@@ -17144,7 +17375,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
|
|
|
17144
17375
|
}
|
|
17145
17376
|
await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
|
|
17146
17377
|
} else {
|
|
17147
|
-
await (0,
|
|
17378
|
+
await (0, import_promises29.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
|
|
17148
17379
|
}
|
|
17149
17380
|
}
|
|
17150
17381
|
}
|
|
@@ -17167,8 +17398,8 @@ var WorkspacePoolManager = class {
|
|
|
17167
17398
|
async acquireWorkspace(options) {
|
|
17168
17399
|
const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
|
|
17169
17400
|
const fingerprint = computeWorkspaceFingerprint(repos);
|
|
17170
|
-
const poolDir =
|
|
17171
|
-
await (0,
|
|
17401
|
+
const poolDir = import_node_path43.default.join(this.poolRoot, fingerprint);
|
|
17402
|
+
await (0, import_promises29.mkdir)(poolDir, { recursive: true });
|
|
17172
17403
|
const drifted = await this.checkDrift(poolDir, fingerprint);
|
|
17173
17404
|
if (drifted) {
|
|
17174
17405
|
console.warn(
|
|
@@ -17177,13 +17408,13 @@ var WorkspacePoolManager = class {
|
|
|
17177
17408
|
await this.removeAllSlots(poolDir);
|
|
17178
17409
|
}
|
|
17179
17410
|
for (let i = 0; i < maxSlots; i++) {
|
|
17180
|
-
const slotPath =
|
|
17411
|
+
const slotPath = import_node_path43.default.join(poolDir, `slot-${i}`);
|
|
17181
17412
|
const lockPath = `${slotPath}.lock`;
|
|
17182
17413
|
const locked = await this.tryLock(lockPath);
|
|
17183
17414
|
if (!locked) {
|
|
17184
17415
|
continue;
|
|
17185
17416
|
}
|
|
17186
|
-
const slotExists = (0,
|
|
17417
|
+
const slotExists = (0, import_node_fs13.existsSync)(slotPath);
|
|
17187
17418
|
if (slotExists) {
|
|
17188
17419
|
await this.resetSlot(slotPath, templatePath, repos, poolReset);
|
|
17189
17420
|
return {
|
|
@@ -17195,7 +17426,7 @@ var WorkspacePoolManager = class {
|
|
|
17195
17426
|
poolDir
|
|
17196
17427
|
};
|
|
17197
17428
|
}
|
|
17198
|
-
await (0,
|
|
17429
|
+
await (0, import_promises29.mkdir)(slotPath, { recursive: true });
|
|
17199
17430
|
if (templatePath) {
|
|
17200
17431
|
await copyDirectoryRecursive2(templatePath, slotPath);
|
|
17201
17432
|
}
|
|
@@ -17219,7 +17450,7 @@ var WorkspacePoolManager = class {
|
|
|
17219
17450
|
/** Remove lock file to release a slot. */
|
|
17220
17451
|
async releaseSlot(slot) {
|
|
17221
17452
|
try {
|
|
17222
|
-
await (0,
|
|
17453
|
+
await (0, import_promises29.unlink)(slot.lockPath);
|
|
17223
17454
|
} catch {
|
|
17224
17455
|
}
|
|
17225
17456
|
}
|
|
@@ -17232,21 +17463,21 @@ var WorkspacePoolManager = class {
|
|
|
17232
17463
|
async tryLock(lockPath) {
|
|
17233
17464
|
for (let attempt = 0; attempt < 3; attempt++) {
|
|
17234
17465
|
try {
|
|
17235
|
-
await (0,
|
|
17466
|
+
await (0, import_promises29.writeFile)(lockPath, String(process.pid), { flag: "wx" });
|
|
17236
17467
|
return true;
|
|
17237
17468
|
} catch (err) {
|
|
17238
17469
|
if (err.code !== "EEXIST") {
|
|
17239
17470
|
throw err;
|
|
17240
17471
|
}
|
|
17241
17472
|
try {
|
|
17242
|
-
const pidStr = await (0,
|
|
17473
|
+
const pidStr = await (0, import_promises29.readFile)(lockPath, "utf-8");
|
|
17243
17474
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
17244
17475
|
if (!Number.isNaN(pid)) {
|
|
17245
17476
|
try {
|
|
17246
17477
|
process.kill(pid, 0);
|
|
17247
17478
|
return false;
|
|
17248
17479
|
} catch {
|
|
17249
|
-
await (0,
|
|
17480
|
+
await (0, import_promises29.unlink)(lockPath).catch(() => {
|
|
17250
17481
|
});
|
|
17251
17482
|
continue;
|
|
17252
17483
|
}
|
|
@@ -17264,9 +17495,9 @@ var WorkspacePoolManager = class {
|
|
|
17264
17495
|
* Returns false (no drift) if metadata.json doesn't exist (first use).
|
|
17265
17496
|
*/
|
|
17266
17497
|
async checkDrift(poolDir, fingerprint) {
|
|
17267
|
-
const metadataPath =
|
|
17498
|
+
const metadataPath = import_node_path43.default.join(poolDir, "metadata.json");
|
|
17268
17499
|
try {
|
|
17269
|
-
const raw = await (0,
|
|
17500
|
+
const raw = await (0, import_promises29.readFile)(metadataPath, "utf-8");
|
|
17270
17501
|
const metadata = JSON.parse(raw);
|
|
17271
17502
|
return metadata.fingerprint !== fingerprint;
|
|
17272
17503
|
} catch {
|
|
@@ -17281,17 +17512,17 @@ var WorkspacePoolManager = class {
|
|
|
17281
17512
|
repos,
|
|
17282
17513
|
createdAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
17283
17514
|
};
|
|
17284
|
-
await (0,
|
|
17515
|
+
await (0, import_promises29.writeFile)(import_node_path43.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
|
|
17285
17516
|
}
|
|
17286
17517
|
/** Remove all slot directories and their lock files from a pool directory. */
|
|
17287
17518
|
async removeAllSlots(poolDir) {
|
|
17288
|
-
const entries = await (0,
|
|
17519
|
+
const entries = await (0, import_promises29.readdir)(poolDir);
|
|
17289
17520
|
for (const entry of entries) {
|
|
17290
17521
|
if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
|
|
17291
|
-
const lockPath =
|
|
17292
|
-
if ((0,
|
|
17522
|
+
const lockPath = import_node_path43.default.join(poolDir, `${entry}.lock`);
|
|
17523
|
+
if ((0, import_node_fs13.existsSync)(lockPath)) {
|
|
17293
17524
|
try {
|
|
17294
|
-
const pidStr = await (0,
|
|
17525
|
+
const pidStr = await (0, import_promises29.readFile)(lockPath, "utf-8");
|
|
17295
17526
|
const pid = Number.parseInt(pidStr.trim(), 10);
|
|
17296
17527
|
if (!Number.isNaN(pid)) {
|
|
17297
17528
|
try {
|
|
@@ -17304,12 +17535,12 @@ var WorkspacePoolManager = class {
|
|
|
17304
17535
|
} catch {
|
|
17305
17536
|
}
|
|
17306
17537
|
}
|
|
17307
|
-
await (0,
|
|
17308
|
-
await (0,
|
|
17538
|
+
await (0, import_promises29.rm)(import_node_path43.default.join(poolDir, entry), { recursive: true, force: true });
|
|
17539
|
+
await (0, import_promises29.rm)(lockPath, { force: true }).catch(() => {
|
|
17309
17540
|
});
|
|
17310
17541
|
}
|
|
17311
17542
|
}
|
|
17312
|
-
await (0,
|
|
17543
|
+
await (0, import_promises29.rm)(import_node_path43.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
|
|
17313
17544
|
});
|
|
17314
17545
|
}
|
|
17315
17546
|
/**
|
|
@@ -17319,8 +17550,8 @@ var WorkspacePoolManager = class {
|
|
|
17319
17550
|
*/
|
|
17320
17551
|
async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
|
|
17321
17552
|
for (const repo of repos) {
|
|
17322
|
-
const repoDir =
|
|
17323
|
-
if (!(0,
|
|
17553
|
+
const repoDir = import_node_path43.default.join(slotPath, repo.path);
|
|
17554
|
+
if (!(0, import_node_fs13.existsSync)(repoDir)) {
|
|
17324
17555
|
continue;
|
|
17325
17556
|
}
|
|
17326
17557
|
if (poolReset === "none") {
|
|
@@ -17344,11 +17575,11 @@ var WorkspacePoolManager = class {
|
|
|
17344
17575
|
};
|
|
17345
17576
|
|
|
17346
17577
|
// src/evaluation/workspace/repo-manager.ts
|
|
17347
|
-
var
|
|
17348
|
-
var
|
|
17349
|
-
var
|
|
17578
|
+
var import_node_child_process10 = require("child_process");
|
|
17579
|
+
var import_node_fs14 = require("fs");
|
|
17580
|
+
var import_node_path44 = __toESM(require("path"), 1);
|
|
17350
17581
|
var import_node_util6 = require("util");
|
|
17351
|
-
var execFileAsync2 = (0, import_node_util6.promisify)(
|
|
17582
|
+
var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process10.execFile);
|
|
17352
17583
|
var DEFAULT_TIMEOUT_MS2 = 3e5;
|
|
17353
17584
|
function gitEnv2() {
|
|
17354
17585
|
const env = { ...process.env };
|
|
@@ -17397,7 +17628,7 @@ var RepoManager = class {
|
|
|
17397
17628
|
resolvedSourcePath: sourcePath ?? "",
|
|
17398
17629
|
reason: "empty_path"
|
|
17399
17630
|
});
|
|
17400
|
-
} else if (!(0,
|
|
17631
|
+
} else if (!(0, import_node_fs14.existsSync)(sourcePath)) {
|
|
17401
17632
|
errors.push({
|
|
17402
17633
|
repoPath: repo.path,
|
|
17403
17634
|
resolvedSourcePath: sourcePath,
|
|
@@ -17446,7 +17677,7 @@ ${lines.join("\n")}`;
|
|
|
17446
17677
|
* Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
|
|
17447
17678
|
*/
|
|
17448
17679
|
async materialize(repo, workspacePath) {
|
|
17449
|
-
const targetDir =
|
|
17680
|
+
const targetDir = import_node_path44.default.join(workspacePath, repo.path);
|
|
17450
17681
|
const sourceUrl = getSourceUrl(repo.source);
|
|
17451
17682
|
const startedAt = Date.now();
|
|
17452
17683
|
if (this.verbose) {
|
|
@@ -17537,7 +17768,7 @@ ${lines.join("\n")}`;
|
|
|
17537
17768
|
async reset(repos, workspacePath, reset) {
|
|
17538
17769
|
const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
|
|
17539
17770
|
for (const repo of repos) {
|
|
17540
|
-
const targetDir =
|
|
17771
|
+
const targetDir = import_node_path44.default.join(workspacePath, repo.path);
|
|
17541
17772
|
await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
|
|
17542
17773
|
await this.runGit(["clean", cleanFlag], { cwd: targetDir });
|
|
17543
17774
|
}
|
|
@@ -17545,36 +17776,36 @@ ${lines.join("\n")}`;
|
|
|
17545
17776
|
};
|
|
17546
17777
|
|
|
17547
17778
|
// src/evaluation/workspace/resolve.ts
|
|
17548
|
-
var
|
|
17549
|
-
var
|
|
17779
|
+
var import_promises30 = require("fs/promises");
|
|
17780
|
+
var import_node_path45 = __toESM(require("path"), 1);
|
|
17550
17781
|
async function resolveWorkspaceTemplate(templatePath) {
|
|
17551
17782
|
if (!templatePath) {
|
|
17552
17783
|
return void 0;
|
|
17553
17784
|
}
|
|
17554
|
-
const resolved =
|
|
17555
|
-
const stats = await (0,
|
|
17785
|
+
const resolved = import_node_path45.default.resolve(templatePath);
|
|
17786
|
+
const stats = await (0, import_promises30.stat)(resolved);
|
|
17556
17787
|
if (stats.isFile()) {
|
|
17557
17788
|
return {
|
|
17558
|
-
dir:
|
|
17789
|
+
dir: import_node_path45.default.dirname(resolved),
|
|
17559
17790
|
workspaceFile: resolved
|
|
17560
17791
|
};
|
|
17561
17792
|
}
|
|
17562
17793
|
if (!stats.isDirectory()) {
|
|
17563
17794
|
throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
|
|
17564
17795
|
}
|
|
17565
|
-
const entries = await (0,
|
|
17796
|
+
const entries = await (0, import_promises30.readdir)(resolved);
|
|
17566
17797
|
const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
|
|
17567
17798
|
if (workspaceFiles.length === 1) {
|
|
17568
17799
|
return {
|
|
17569
17800
|
dir: resolved,
|
|
17570
|
-
workspaceFile:
|
|
17801
|
+
workspaceFile: import_node_path45.default.join(resolved, workspaceFiles[0])
|
|
17571
17802
|
};
|
|
17572
17803
|
}
|
|
17573
17804
|
if (workspaceFiles.length > 1) {
|
|
17574
17805
|
const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
|
|
17575
17806
|
return {
|
|
17576
17807
|
dir: resolved,
|
|
17577
|
-
workspaceFile: conventionFile ?
|
|
17808
|
+
workspaceFile: conventionFile ? import_node_path45.default.join(resolved, conventionFile) : void 0
|
|
17578
17809
|
};
|
|
17579
17810
|
}
|
|
17580
17811
|
return { dir: resolved };
|
|
@@ -17711,7 +17942,7 @@ async function runEvaluation(options) {
|
|
|
17711
17942
|
);
|
|
17712
17943
|
useCache = false;
|
|
17713
17944
|
}
|
|
17714
|
-
const evalRunId = (0,
|
|
17945
|
+
const evalRunId = (0, import_node_crypto11.randomUUID)();
|
|
17715
17946
|
const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
|
|
17716
17947
|
const filteredEvalCases = filterEvalCases(evalCases, filter);
|
|
17717
17948
|
if (filteredEvalCases.length === 0) {
|
|
@@ -17790,7 +18021,7 @@ async function runEvaluation(options) {
|
|
|
17790
18021
|
];
|
|
17791
18022
|
const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
|
|
17792
18023
|
const typeRegistry = createBuiltinRegistry();
|
|
17793
|
-
const discoveryBaseDir = evalFilePath ?
|
|
18024
|
+
const discoveryBaseDir = evalFilePath ? import_node_path46.default.dirname(import_node_path46.default.resolve(evalFilePath)) : process.cwd();
|
|
17794
18025
|
const evalDir = discoveryBaseDir;
|
|
17795
18026
|
await discoverAssertions(typeRegistry, discoveryBaseDir);
|
|
17796
18027
|
await discoverGraders(typeRegistry, discoveryBaseDir);
|
|
@@ -17930,14 +18161,14 @@ async function runEvaluation(options) {
|
|
|
17930
18161
|
let staticMaterialised = false;
|
|
17931
18162
|
if (useStaticWorkspace && configuredStaticPath) {
|
|
17932
18163
|
const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
|
|
17933
|
-
const dirExists = await (0,
|
|
18164
|
+
const dirExists = await (0, import_promises31.stat)(configuredStaticPath).then(
|
|
17934
18165
|
(s) => s.isDirectory(),
|
|
17935
18166
|
() => false
|
|
17936
18167
|
);
|
|
17937
|
-
const isEmpty = dirExists ? (await (0,
|
|
18168
|
+
const isEmpty = dirExists ? (await (0, import_promises31.readdir)(configuredStaticPath)).length === 0 : false;
|
|
17938
18169
|
if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
|
|
17939
18170
|
if (!dirExists) {
|
|
17940
|
-
await (0,
|
|
18171
|
+
await (0, import_promises31.mkdir)(configuredStaticPath, { recursive: true });
|
|
17941
18172
|
}
|
|
17942
18173
|
if (workspaceTemplate) {
|
|
17943
18174
|
await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
|
|
@@ -17982,14 +18213,14 @@ async function runEvaluation(options) {
|
|
|
17982
18213
|
}
|
|
17983
18214
|
} else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
|
|
17984
18215
|
sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
|
|
17985
|
-
await (0,
|
|
18216
|
+
await (0, import_promises31.mkdir)(sharedWorkspacePath, { recursive: true });
|
|
17986
18217
|
setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
|
|
17987
18218
|
}
|
|
17988
18219
|
try {
|
|
17989
18220
|
if (suiteWorkspaceFile && sharedWorkspacePath) {
|
|
17990
|
-
const copiedWorkspaceFile =
|
|
18221
|
+
const copiedWorkspaceFile = import_node_path46.default.join(sharedWorkspacePath, import_node_path46.default.basename(suiteWorkspaceFile));
|
|
17991
18222
|
try {
|
|
17992
|
-
await (0,
|
|
18223
|
+
await (0, import_promises31.stat)(copiedWorkspaceFile);
|
|
17993
18224
|
suiteWorkspaceFile = copiedWorkspaceFile;
|
|
17994
18225
|
} catch {
|
|
17995
18226
|
}
|
|
@@ -18569,9 +18800,9 @@ async function runEvalCase(options) {
|
|
|
18569
18800
|
);
|
|
18570
18801
|
}
|
|
18571
18802
|
if (caseWorkspaceFile && workspacePath) {
|
|
18572
|
-
const copiedFile =
|
|
18803
|
+
const copiedFile = import_node_path46.default.join(workspacePath, import_node_path46.default.basename(caseWorkspaceFile));
|
|
18573
18804
|
try {
|
|
18574
|
-
await (0,
|
|
18805
|
+
await (0, import_promises31.stat)(copiedFile);
|
|
18575
18806
|
caseWorkspaceFile = copiedFile;
|
|
18576
18807
|
} catch {
|
|
18577
18808
|
}
|
|
@@ -18579,7 +18810,7 @@ async function runEvalCase(options) {
|
|
|
18579
18810
|
}
|
|
18580
18811
|
if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
|
|
18581
18812
|
workspacePath = getWorkspacePath(evalRunId, evalCase.id);
|
|
18582
|
-
await (0,
|
|
18813
|
+
await (0, import_promises31.mkdir)(workspacePath, { recursive: true });
|
|
18583
18814
|
}
|
|
18584
18815
|
if (evalCase.workspace?.repos?.length && workspacePath) {
|
|
18585
18816
|
const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
|
|
@@ -18631,11 +18862,11 @@ async function runEvalCase(options) {
|
|
|
18631
18862
|
const files = evalCase.metadata.agent_skills_files;
|
|
18632
18863
|
if (baseDir && files.length > 0) {
|
|
18633
18864
|
for (const relPath of files) {
|
|
18634
|
-
const srcPath =
|
|
18635
|
-
const destPath =
|
|
18865
|
+
const srcPath = import_node_path46.default.resolve(baseDir, relPath);
|
|
18866
|
+
const destPath = import_node_path46.default.resolve(workspacePath, relPath);
|
|
18636
18867
|
try {
|
|
18637
|
-
await (0,
|
|
18638
|
-
await (0,
|
|
18868
|
+
await (0, import_promises31.mkdir)(import_node_path46.default.dirname(destPath), { recursive: true });
|
|
18869
|
+
await (0, import_promises31.copyFile)(srcPath, destPath);
|
|
18639
18870
|
} catch (error) {
|
|
18640
18871
|
const message = error instanceof Error ? error.message : String(error);
|
|
18641
18872
|
return buildErrorResult(
|
|
@@ -19280,7 +19511,7 @@ async function runEvaluatorList(options) {
|
|
|
19280
19511
|
fileChanges,
|
|
19281
19512
|
workspacePath
|
|
19282
19513
|
};
|
|
19283
|
-
const evalFileDir = evalCase.file_paths[0] ?
|
|
19514
|
+
const evalFileDir = evalCase.file_paths[0] ? import_node_path46.default.dirname(evalCase.file_paths[0]) : process.cwd();
|
|
19284
19515
|
const dispatchContext = {
|
|
19285
19516
|
graderProvider,
|
|
19286
19517
|
targetResolver,
|
|
@@ -19510,7 +19741,7 @@ function extractProviderError(response) {
|
|
|
19510
19741
|
return trimmed.length > 0 ? trimmed : void 0;
|
|
19511
19742
|
}
|
|
19512
19743
|
function createCacheKey(provider, target, evalCase, promptInputs) {
|
|
19513
|
-
const hash = (0,
|
|
19744
|
+
const hash = (0, import_node_crypto11.createHash)("sha256");
|
|
19514
19745
|
hash.update(provider.id);
|
|
19515
19746
|
hash.update(target.name);
|
|
19516
19747
|
hash.update(evalCase.id);
|
|
@@ -19613,8 +19844,8 @@ function computeWeightedMean(entries) {
|
|
|
19613
19844
|
}
|
|
19614
19845
|
|
|
19615
19846
|
// src/evaluation/evaluate.ts
|
|
19616
|
-
var
|
|
19617
|
-
var
|
|
19847
|
+
var import_node_fs15 = require("fs");
|
|
19848
|
+
var import_node_path47 = __toESM(require("path"), 1);
|
|
19618
19849
|
|
|
19619
19850
|
// src/evaluation/providers/function-provider.ts
|
|
19620
19851
|
function createFunctionProvider(taskFn) {
|
|
@@ -19651,7 +19882,7 @@ async function evaluate(config) {
|
|
|
19651
19882
|
}
|
|
19652
19883
|
const gitRoot = await findGitRoot(process.cwd());
|
|
19653
19884
|
const repoRoot = gitRoot ?? process.cwd();
|
|
19654
|
-
const testFilePath = config.specFile ?
|
|
19885
|
+
const testFilePath = config.specFile ? import_node_path47.default.resolve(config.specFile) : import_node_path47.default.join(process.cwd(), "__programmatic__.yaml");
|
|
19655
19886
|
await loadEnvHierarchy(repoRoot, testFilePath);
|
|
19656
19887
|
let resolvedTarget;
|
|
19657
19888
|
let taskProvider;
|
|
@@ -19772,11 +20003,11 @@ function computeSummary(results, durationMs) {
|
|
|
19772
20003
|
var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
|
|
19773
20004
|
async function discoverDefaultTarget(repoRoot) {
|
|
19774
20005
|
const cwd = process.cwd();
|
|
19775
|
-
const chain = buildDirectoryChain2(
|
|
20006
|
+
const chain = buildDirectoryChain2(import_node_path47.default.join(cwd, "_placeholder"), repoRoot);
|
|
19776
20007
|
for (const dir of chain) {
|
|
19777
20008
|
for (const candidate of TARGET_FILE_CANDIDATES) {
|
|
19778
|
-
const targetsPath =
|
|
19779
|
-
if (!(0,
|
|
20009
|
+
const targetsPath = import_node_path47.default.join(dir, candidate);
|
|
20010
|
+
if (!(0, import_node_fs15.existsSync)(targetsPath)) continue;
|
|
19780
20011
|
try {
|
|
19781
20012
|
const definitions = await readTargetDefinitions(targetsPath);
|
|
19782
20013
|
const defaultTarget = definitions.find((d) => d.name === "default");
|
|
@@ -19792,8 +20023,8 @@ async function loadEnvHierarchy(repoRoot, startPath) {
|
|
|
19792
20023
|
const chain = buildDirectoryChain2(startPath, repoRoot);
|
|
19793
20024
|
const envFiles = [];
|
|
19794
20025
|
for (const dir of chain) {
|
|
19795
|
-
const envPath =
|
|
19796
|
-
if ((0,
|
|
20026
|
+
const envPath = import_node_path47.default.join(dir, ".env");
|
|
20027
|
+
if ((0, import_node_fs15.existsSync)(envPath)) envFiles.push(envPath);
|
|
19797
20028
|
}
|
|
19798
20029
|
for (let i = 0; i < envFiles.length; i++) {
|
|
19799
20030
|
try {
|
|
@@ -19973,8 +20204,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
|
|
|
19973
20204
|
}
|
|
19974
20205
|
|
|
19975
20206
|
// src/evaluation/cache/response-cache.ts
|
|
19976
|
-
var
|
|
19977
|
-
var
|
|
20207
|
+
var import_promises32 = require("fs/promises");
|
|
20208
|
+
var import_node_path48 = __toESM(require("path"), 1);
|
|
19978
20209
|
var DEFAULT_CACHE_PATH = ".agentv/cache";
|
|
19979
20210
|
var ResponseCache = class {
|
|
19980
20211
|
cachePath;
|
|
@@ -19984,7 +20215,7 @@ var ResponseCache = class {
|
|
|
19984
20215
|
async get(key) {
|
|
19985
20216
|
const filePath = this.keyToPath(key);
|
|
19986
20217
|
try {
|
|
19987
|
-
const data = await (0,
|
|
20218
|
+
const data = await (0, import_promises32.readFile)(filePath, "utf8");
|
|
19988
20219
|
return JSON.parse(data);
|
|
19989
20220
|
} catch {
|
|
19990
20221
|
return void 0;
|
|
@@ -19992,13 +20223,13 @@ var ResponseCache = class {
|
|
|
19992
20223
|
}
|
|
19993
20224
|
async set(key, value) {
|
|
19994
20225
|
const filePath = this.keyToPath(key);
|
|
19995
|
-
const dir =
|
|
19996
|
-
await (0,
|
|
19997
|
-
await (0,
|
|
20226
|
+
const dir = import_node_path48.default.dirname(filePath);
|
|
20227
|
+
await (0, import_promises32.mkdir)(dir, { recursive: true });
|
|
20228
|
+
await (0, import_promises32.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
|
|
19998
20229
|
}
|
|
19999
20230
|
keyToPath(key) {
|
|
20000
20231
|
const prefix = key.slice(0, 2);
|
|
20001
|
-
return
|
|
20232
|
+
return import_node_path48.default.join(this.cachePath, prefix, `${key}.json`);
|
|
20002
20233
|
}
|
|
20003
20234
|
};
|
|
20004
20235
|
function shouldEnableCache(params) {
|
|
@@ -20196,6 +20427,17 @@ var OtelTraceExporter = class {
|
|
|
20196
20427
|
if (result.durationMs != null)
|
|
20197
20428
|
rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
|
|
20198
20429
|
if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
|
|
20430
|
+
if (result.tokenUsage) {
|
|
20431
|
+
if (result.tokenUsage.input != null) {
|
|
20432
|
+
rootSpan.setAttribute("agentv.trace.token_input", result.tokenUsage.input);
|
|
20433
|
+
}
|
|
20434
|
+
if (result.tokenUsage.output != null) {
|
|
20435
|
+
rootSpan.setAttribute("agentv.trace.token_output", result.tokenUsage.output);
|
|
20436
|
+
}
|
|
20437
|
+
if (result.tokenUsage.cached != null) {
|
|
20438
|
+
rootSpan.setAttribute("agentv.trace.token_cached", result.tokenUsage.cached);
|
|
20439
|
+
}
|
|
20440
|
+
}
|
|
20199
20441
|
if (result.trace) {
|
|
20200
20442
|
const t = result.trace;
|
|
20201
20443
|
rootSpan.setAttribute("agentv.trace.event_count", t.eventCount);
|
|
@@ -20298,6 +20540,7 @@ var OtelTraceExporter = class {
|
|
|
20298
20540
|
tracer.startActiveSpan(
|
|
20299
20541
|
spanName,
|
|
20300
20542
|
{ startTime: startHr },
|
|
20543
|
+
parentCtx,
|
|
20301
20544
|
(span) => {
|
|
20302
20545
|
if (isAssistant) {
|
|
20303
20546
|
span.setAttribute("gen_ai.operation.name", "chat");
|
|
@@ -20330,6 +20573,7 @@ var OtelTraceExporter = class {
|
|
|
20330
20573
|
tracer.startActiveSpan(
|
|
20331
20574
|
`execute_tool ${tc.tool}`,
|
|
20332
20575
|
{},
|
|
20576
|
+
msgCtx,
|
|
20333
20577
|
(toolSpan) => {
|
|
20334
20578
|
toolSpan.setAttribute("gen_ai.tool.name", tc.tool);
|
|
20335
20579
|
if (tc.id) toolSpan.setAttribute("gen_ai.tool.call.id", tc.id);
|
|
@@ -20370,8 +20614,12 @@ var OtelStreamingObserver = class {
|
|
|
20370
20614
|
rootSpan = null;
|
|
20371
20615
|
// biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
|
|
20372
20616
|
rootCtx = null;
|
|
20617
|
+
observedChildSpans = false;
|
|
20618
|
+
pendingMetrics = null;
|
|
20373
20619
|
/** Create root eval span immediately (visible in backend right away) */
|
|
20374
20620
|
startEvalCase(testId, target, evalSet) {
|
|
20621
|
+
this.pendingMetrics = null;
|
|
20622
|
+
this.observedChildSpans = false;
|
|
20375
20623
|
const ctx = this.parentCtx ?? this.api.context.active();
|
|
20376
20624
|
this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
|
|
20377
20625
|
this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
|
|
@@ -20384,8 +20632,9 @@ var OtelStreamingObserver = class {
|
|
|
20384
20632
|
/** Create and immediately export a tool span */
|
|
20385
20633
|
onToolCall(name, input, output, _durationMs, toolCallId) {
|
|
20386
20634
|
if (!this.rootCtx) return;
|
|
20635
|
+
this.observedChildSpans = true;
|
|
20387
20636
|
this.api.context.with(this.rootCtx, () => {
|
|
20388
|
-
const span = this.tracer.startSpan(`execute_tool ${name}
|
|
20637
|
+
const span = this.tracer.startSpan(`execute_tool ${name}`, void 0, this.rootCtx);
|
|
20389
20638
|
span.setAttribute("gen_ai.tool.name", name);
|
|
20390
20639
|
if (toolCallId) span.setAttribute("gen_ai.tool.call.id", toolCallId);
|
|
20391
20640
|
if (this.captureContent) {
|
|
@@ -20406,8 +20655,9 @@ var OtelStreamingObserver = class {
|
|
|
20406
20655
|
/** Create and immediately export an LLM span */
|
|
20407
20656
|
onLlmCall(model, tokenUsage) {
|
|
20408
20657
|
if (!this.rootCtx) return;
|
|
20658
|
+
this.observedChildSpans = true;
|
|
20409
20659
|
this.api.context.with(this.rootCtx, () => {
|
|
20410
|
-
const span = this.tracer.startSpan(`chat ${model}
|
|
20660
|
+
const span = this.tracer.startSpan(`chat ${model}`, void 0, this.rootCtx);
|
|
20411
20661
|
span.setAttribute("gen_ai.operation.name", "chat");
|
|
20412
20662
|
span.setAttribute("gen_ai.request.model", model);
|
|
20413
20663
|
span.setAttribute("gen_ai.response.model", model);
|
|
@@ -20422,10 +20672,53 @@ var OtelStreamingObserver = class {
|
|
|
20422
20672
|
span.end();
|
|
20423
20673
|
});
|
|
20424
20674
|
}
|
|
20675
|
+
/** Record final execution metrics before the root span is finalized. */
|
|
20676
|
+
recordEvalMetrics(result) {
|
|
20677
|
+
this.pendingMetrics = result;
|
|
20678
|
+
}
|
|
20425
20679
|
/** Finalize root span with score/verdict after evaluation completes */
|
|
20426
20680
|
finalizeEvalCase(score, error) {
|
|
20427
20681
|
if (!this.rootSpan) return;
|
|
20428
20682
|
this.rootSpan.setAttribute("agentv.score", score);
|
|
20683
|
+
if (this.pendingMetrics?.durationMs != null) {
|
|
20684
|
+
this.rootSpan.setAttribute("agentv.trace.duration_ms", this.pendingMetrics.durationMs);
|
|
20685
|
+
}
|
|
20686
|
+
if (this.pendingMetrics?.costUsd != null) {
|
|
20687
|
+
this.rootSpan.setAttribute("agentv.trace.cost_usd", this.pendingMetrics.costUsd);
|
|
20688
|
+
}
|
|
20689
|
+
if (this.pendingMetrics?.tokenUsage) {
|
|
20690
|
+
if (this.pendingMetrics.tokenUsage.input != null) {
|
|
20691
|
+
this.rootSpan.setAttribute(
|
|
20692
|
+
"agentv.trace.token_input",
|
|
20693
|
+
this.pendingMetrics.tokenUsage.input
|
|
20694
|
+
);
|
|
20695
|
+
}
|
|
20696
|
+
if (this.pendingMetrics.tokenUsage.output != null) {
|
|
20697
|
+
this.rootSpan.setAttribute(
|
|
20698
|
+
"agentv.trace.token_output",
|
|
20699
|
+
this.pendingMetrics.tokenUsage.output
|
|
20700
|
+
);
|
|
20701
|
+
}
|
|
20702
|
+
if (this.pendingMetrics.tokenUsage.cached != null) {
|
|
20703
|
+
this.rootSpan.setAttribute(
|
|
20704
|
+
"agentv.trace.token_cached",
|
|
20705
|
+
this.pendingMetrics.tokenUsage.cached
|
|
20706
|
+
);
|
|
20707
|
+
}
|
|
20708
|
+
}
|
|
20709
|
+
if (this.pendingMetrics?.trace) {
|
|
20710
|
+
this.rootSpan.setAttribute("agentv.trace.event_count", this.pendingMetrics.trace.eventCount);
|
|
20711
|
+
this.rootSpan.setAttribute(
|
|
20712
|
+
"agentv.trace.tool_names",
|
|
20713
|
+
Object.keys(this.pendingMetrics.trace.toolCalls).sort().join(",")
|
|
20714
|
+
);
|
|
20715
|
+
if (this.pendingMetrics.trace.llmCallCount != null) {
|
|
20716
|
+
this.rootSpan.setAttribute(
|
|
20717
|
+
"agentv.trace.llm_call_count",
|
|
20718
|
+
this.pendingMetrics.trace.llmCallCount
|
|
20719
|
+
);
|
|
20720
|
+
}
|
|
20721
|
+
}
|
|
20429
20722
|
if (error) {
|
|
20430
20723
|
this.rootSpan.setStatus({ code: this.api.SpanStatusCode.ERROR, message: error });
|
|
20431
20724
|
} else {
|
|
@@ -20434,6 +20727,33 @@ var OtelStreamingObserver = class {
|
|
|
20434
20727
|
this.rootSpan.end();
|
|
20435
20728
|
this.rootSpan = null;
|
|
20436
20729
|
this.rootCtx = null;
|
|
20730
|
+
this.observedChildSpans = false;
|
|
20731
|
+
this.pendingMetrics = null;
|
|
20732
|
+
}
|
|
20733
|
+
/** Backfill child spans from the completed result when the provider emitted no live callbacks. */
|
|
20734
|
+
completeFromResult(result) {
|
|
20735
|
+
this.recordEvalMetrics({
|
|
20736
|
+
durationMs: result.durationMs,
|
|
20737
|
+
costUsd: result.costUsd,
|
|
20738
|
+
tokenUsage: result.tokenUsage,
|
|
20739
|
+
trace: result.trace
|
|
20740
|
+
});
|
|
20741
|
+
if (this.observedChildSpans || !this.rootCtx) {
|
|
20742
|
+
return;
|
|
20743
|
+
}
|
|
20744
|
+
const model = result.output.find((msg) => msg.role === "assistant")?.metadata?.model ?? result.target ?? "unknown";
|
|
20745
|
+
this.onLlmCall(String(model), result.tokenUsage);
|
|
20746
|
+
for (const message of result.output) {
|
|
20747
|
+
for (const toolCall of message.toolCalls ?? []) {
|
|
20748
|
+
this.onToolCall(
|
|
20749
|
+
toolCall.tool,
|
|
20750
|
+
toolCall.input,
|
|
20751
|
+
toolCall.output,
|
|
20752
|
+
toolCall.durationMs ?? 0,
|
|
20753
|
+
toolCall.id
|
|
20754
|
+
);
|
|
20755
|
+
}
|
|
20756
|
+
}
|
|
20437
20757
|
}
|
|
20438
20758
|
/** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
|
|
20439
20759
|
getActiveSpanIds() {
|