@agentv/core 3.11.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1315,12 +1315,12 @@ function serializeAttributeValue(value) {
1315
1315
  if (Array.isArray(value)) return { arrayValue: { values: value.map(serializeAttributeValue) } };
1316
1316
  return { stringValue: String(value) };
1317
1317
  }
1318
- var import_promises32, import_node_path48, OtlpJsonFileExporter;
1318
+ var import_promises33, import_node_path49, OtlpJsonFileExporter;
1319
1319
  var init_otlp_json_file_exporter = __esm({
1320
1320
  "src/observability/otlp-json-file-exporter.ts"() {
1321
1321
  "use strict";
1322
- import_promises32 = require("fs/promises");
1323
- import_node_path48 = require("path");
1322
+ import_promises33 = require("fs/promises");
1323
+ import_node_path49 = require("path");
1324
1324
  OtlpJsonFileExporter = class {
1325
1325
  // biome-ignore lint/suspicious/noExplicitAny: serialized span data
1326
1326
  spans = [];
@@ -1359,7 +1359,7 @@ var init_otlp_json_file_exporter = __esm({
1359
1359
  }
1360
1360
  async flush() {
1361
1361
  if (this.spans.length === 0) return;
1362
- await (0, import_promises32.mkdir)((0, import_node_path48.dirname)(this.filePath), { recursive: true });
1362
+ await (0, import_promises33.mkdir)((0, import_node_path49.dirname)(this.filePath), { recursive: true });
1363
1363
  const otlpJson = {
1364
1364
  resourceSpans: [
1365
1365
  {
@@ -1390,27 +1390,28 @@ function hrTimeDiffMs(start, end) {
1390
1390
  const diffNano = end[1] - start[1];
1391
1391
  return Math.round(diffSec * 1e3 + diffNano / 1e6);
1392
1392
  }
1393
- var import_node_fs15, import_promises33, import_node_path49, SimpleTraceFileExporter;
1393
+ var import_node_fs16, import_promises34, import_node_path50, SimpleTraceFileExporter;
1394
1394
  var init_simple_trace_file_exporter = __esm({
1395
1395
  "src/observability/simple-trace-file-exporter.ts"() {
1396
1396
  "use strict";
1397
- import_node_fs15 = require("fs");
1398
- import_promises33 = require("fs/promises");
1399
- import_node_path49 = require("path");
1397
+ import_node_fs16 = require("fs");
1398
+ import_promises34 = require("fs/promises");
1399
+ import_node_path50 = require("path");
1400
1400
  SimpleTraceFileExporter = class {
1401
1401
  stream = null;
1402
1402
  filePath;
1403
1403
  streamReady = null;
1404
1404
  pendingWrites = [];
1405
1405
  _shuttingDown = false;
1406
+ spansByTraceId = /* @__PURE__ */ new Map();
1406
1407
  constructor(filePath) {
1407
1408
  this.filePath = filePath;
1408
1409
  }
1409
1410
  async ensureStream() {
1410
1411
  if (!this.streamReady) {
1411
1412
  this.streamReady = (async () => {
1412
- await (0, import_promises33.mkdir)((0, import_node_path49.dirname)(this.filePath), { recursive: true });
1413
- this.stream = (0, import_node_fs15.createWriteStream)(this.filePath, { flags: "w" });
1413
+ await (0, import_promises34.mkdir)((0, import_node_path50.dirname)(this.filePath), { recursive: true });
1414
+ this.stream = (0, import_node_fs16.createWriteStream)(this.filePath, { flags: "w" });
1414
1415
  return this.stream;
1415
1416
  })();
1416
1417
  }
@@ -1421,25 +1422,27 @@ var init_simple_trace_file_exporter = __esm({
1421
1422
  resultCallback({ code: 0 });
1422
1423
  return;
1423
1424
  }
1424
- const spanMap = /* @__PURE__ */ new Map();
1425
- const childMap = /* @__PURE__ */ new Map();
1425
+ const rootSpans = [];
1426
1426
  for (const span of spans) {
1427
- spanMap.set(span.spanContext().spanId, span);
1428
- const parentId = span.parentSpanId;
1429
- if (parentId) {
1430
- if (!childMap.has(parentId)) childMap.set(parentId, []);
1431
- childMap.get(parentId)?.push(span);
1427
+ const traceId = span.spanContext().traceId;
1428
+ const existing = this.spansByTraceId.get(traceId) ?? [];
1429
+ existing.push(span);
1430
+ this.spansByTraceId.set(traceId, existing);
1431
+ if (span.name === "agentv.eval") {
1432
+ rootSpans.push(span);
1432
1433
  }
1433
1434
  }
1434
- const rootSpans = spans.filter(
1435
- (s) => !s.parentSpanId || !spanMap.has(s.parentSpanId)
1436
- );
1437
1435
  const writePromise = this.ensureStream().then((stream) => {
1438
1436
  for (const root of rootSpans) {
1439
- const children = this.collectChildren(root.spanContext().spanId, childMap);
1437
+ const traceId = root.spanContext().traceId;
1438
+ const traceSpans = this.spansByTraceId.get(traceId) ?? [root];
1439
+ const children = traceSpans.filter(
1440
+ (span) => span.spanContext().spanId !== root.spanContext().spanId
1441
+ );
1440
1442
  const record = this.buildSimpleRecord(root, children);
1441
1443
  stream.write(`${JSON.stringify(record)}
1442
1444
  `);
1445
+ this.spansByTraceId.delete(traceId);
1443
1446
  }
1444
1447
  });
1445
1448
  this.pendingWrites.push(writePromise);
@@ -1449,6 +1452,7 @@ var init_simple_trace_file_exporter = __esm({
1449
1452
  this._shuttingDown = true;
1450
1453
  await Promise.all(this.pendingWrites);
1451
1454
  this.pendingWrites = [];
1455
+ this.spansByTraceId.clear();
1452
1456
  return new Promise((resolve) => {
1453
1457
  if (this.stream) {
1454
1458
  this.stream.end(() => resolve());
@@ -1461,17 +1465,9 @@ var init_simple_trace_file_exporter = __esm({
1461
1465
  await Promise.all(this.pendingWrites);
1462
1466
  this.pendingWrites = [];
1463
1467
  }
1464
- collectChildren(spanId, childMap) {
1465
- const direct = childMap.get(spanId) || [];
1466
- const all = [...direct];
1467
- for (const child of direct) {
1468
- all.push(...this.collectChildren(child.spanContext().spanId, childMap));
1469
- }
1470
- return all;
1471
- }
1472
1468
  buildSimpleRecord(root, children) {
1473
1469
  const attrs = root.attributes || {};
1474
- const durationMs = hrTimeDiffMs(root.startTime, root.endTime);
1470
+ const durationMs = typeof attrs["agentv.trace.duration_ms"] === "number" ? attrs["agentv.trace.duration_ms"] : hrTimeDiffMs(root.startTime, root.endTime);
1475
1471
  let inputTokens = 0;
1476
1472
  let outputTokens = 0;
1477
1473
  for (const child of children) {
@@ -1479,6 +1475,14 @@ var init_simple_trace_file_exporter = __esm({
1479
1475
  if (ca["gen_ai.usage.input_tokens"]) inputTokens += ca["gen_ai.usage.input_tokens"];
1480
1476
  if (ca["gen_ai.usage.output_tokens"]) outputTokens += ca["gen_ai.usage.output_tokens"];
1481
1477
  }
1478
+ const rootInputTokens = typeof attrs["agentv.trace.token_input"] === "number" ? attrs["agentv.trace.token_input"] : 0;
1479
+ const rootOutputTokens = typeof attrs["agentv.trace.token_output"] === "number" ? attrs["agentv.trace.token_output"] : 0;
1480
+ const rootCachedTokens = typeof attrs["agentv.trace.token_cached"] === "number" ? attrs["agentv.trace.token_cached"] : void 0;
1481
+ const llmSpans = children.filter((s) => s.attributes?.["gen_ai.operation.name"] === "chat").map((s) => ({
1482
+ type: "llm",
1483
+ name: s.name,
1484
+ duration_ms: hrTimeDiffMs(s.startTime, s.endTime)
1485
+ }));
1482
1486
  const toolSpans = children.filter((s) => s.attributes?.["gen_ai.tool.name"]).map((s) => ({
1483
1487
  type: "tool",
1484
1488
  name: s.attributes["gen_ai.tool.name"],
@@ -1490,8 +1494,12 @@ var init_simple_trace_file_exporter = __esm({
1490
1494
  score: attrs["agentv.score"],
1491
1495
  duration_ms: durationMs,
1492
1496
  cost_usd: attrs["agentv.trace.cost_usd"],
1493
- token_usage: inputTokens || outputTokens ? { input: inputTokens, output: outputTokens } : void 0,
1494
- spans: toolSpans.length > 0 ? toolSpans : void 0
1497
+ token_usage: inputTokens || outputTokens || rootInputTokens || rootOutputTokens || rootCachedTokens ? {
1498
+ input: inputTokens || rootInputTokens,
1499
+ output: outputTokens || rootOutputTokens,
1500
+ ...rootCachedTokens ? { cached: rootCachedTokens } : {}
1501
+ } : void 0,
1502
+ spans: [...llmSpans, ...toolSpans].length > 0 ? [...llmSpans, ...toolSpans] : void 0
1495
1503
  };
1496
1504
  }
1497
1505
  };
@@ -8749,265 +8757,7 @@ var MockProvider = class {
8749
8757
  }
8750
8758
  };
8751
8759
 
8752
- // src/evaluation/providers/pi-utils.ts
8753
- function extractPiTextContent(content) {
8754
- if (typeof content === "string") {
8755
- return content;
8756
- }
8757
- if (!Array.isArray(content)) {
8758
- return void 0;
8759
- }
8760
- const textParts = [];
8761
- for (const part of content) {
8762
- if (!part || typeof part !== "object") {
8763
- continue;
8764
- }
8765
- const p = part;
8766
- if (p.type === "text" && typeof p.text === "string") {
8767
- textParts.push(p.text);
8768
- }
8769
- }
8770
- return textParts.length > 0 ? textParts.join("\n") : void 0;
8771
- }
8772
- function toFiniteNumber(value) {
8773
- if (typeof value === "number" && Number.isFinite(value)) return value;
8774
- return void 0;
8775
- }
8776
-
8777
- // src/evaluation/providers/pi-agent-sdk.ts
8778
- var piAgentModule = null;
8779
- var piAiModule = null;
8780
- async function loadPiModules() {
8781
- if (!piAgentModule || !piAiModule) {
8782
- try {
8783
- [piAgentModule, piAiModule] = await Promise.all([
8784
- import("@mariozechner/pi-agent-core"),
8785
- import("@mariozechner/pi-ai")
8786
- ]);
8787
- } catch (error) {
8788
- throw new Error(
8789
- `Failed to load pi-agent-sdk dependencies. Please install them:
8790
- npm install @mariozechner/pi-agent-core @mariozechner/pi-ai
8791
-
8792
- Original error: ${error instanceof Error ? error.message : String(error)}`
8793
- );
8794
- }
8795
- }
8796
- return {
8797
- Agent: piAgentModule.Agent,
8798
- getModel: piAiModule.getModel,
8799
- getEnvApiKey: piAiModule.getEnvApiKey
8800
- };
8801
- }
8802
- var PiAgentSdkProvider = class {
8803
- id;
8804
- kind = "pi-agent-sdk";
8805
- targetName;
8806
- supportsBatch = false;
8807
- config;
8808
- constructor(targetName, config) {
8809
- this.id = `pi-agent-sdk:${targetName}`;
8810
- this.targetName = targetName;
8811
- this.config = config;
8812
- }
8813
- async invoke(request) {
8814
- if (request.signal?.aborted) {
8815
- throw new Error("Pi agent SDK request was aborted before execution");
8816
- }
8817
- const { Agent, getModel, getEnvApiKey } = await loadPiModules();
8818
- const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
8819
- const startMs = Date.now();
8820
- const providerName = this.config.subprovider ?? "anthropic";
8821
- const modelId = this.config.model ?? "claude-sonnet-4-20250514";
8822
- const model = getModel(providerName, modelId);
8823
- const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
8824
- const agent = new Agent({
8825
- initialState: {
8826
- systemPrompt,
8827
- model,
8828
- tools: [],
8829
- // No tools for simple Q&A
8830
- messages: []
8831
- },
8832
- getApiKey: async (provider) => {
8833
- return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
8834
- }
8835
- });
8836
- let tokenUsage;
8837
- let costUsd;
8838
- const toolTrackers = /* @__PURE__ */ new Map();
8839
- const completedToolResults = /* @__PURE__ */ new Map();
8840
- const unsubscribe = agent.subscribe((event) => {
8841
- switch (event.type) {
8842
- case "message_end": {
8843
- const msg = event.message;
8844
- if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
8845
- const usage = msg.usage;
8846
- if (usage && typeof usage === "object") {
8847
- const u = usage;
8848
- const input = toFiniteNumber(u.input);
8849
- const output = toFiniteNumber(u.output);
8850
- const cached = toFiniteNumber(u.cacheRead);
8851
- let callDelta;
8852
- if (input !== void 0 || output !== void 0) {
8853
- callDelta = {
8854
- input: input ?? 0,
8855
- output: output ?? 0,
8856
- ...cached !== void 0 ? { cached } : {}
8857
- };
8858
- tokenUsage = {
8859
- input: (tokenUsage?.input ?? 0) + callDelta.input,
8860
- output: (tokenUsage?.output ?? 0) + callDelta.output,
8861
- ...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
8862
- };
8863
- }
8864
- const cost = u.cost;
8865
- if (cost && typeof cost === "object") {
8866
- const total = toFiniteNumber(cost.total);
8867
- if (total !== void 0) {
8868
- costUsd = (costUsd ?? 0) + total;
8869
- }
8870
- }
8871
- request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
8872
- }
8873
- }
8874
- break;
8875
- }
8876
- case "tool_execution_start": {
8877
- toolTrackers.set(event.toolCallId, {
8878
- toolCallId: event.toolCallId,
8879
- toolName: event.toolName,
8880
- args: event.args,
8881
- startMs: Date.now(),
8882
- startTime: (/* @__PURE__ */ new Date()).toISOString()
8883
- });
8884
- request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
8885
- break;
8886
- }
8887
- case "tool_execution_end": {
8888
- const tracker = toolTrackers.get(event.toolCallId);
8889
- const durationMs = tracker ? Date.now() - tracker.startMs : 0;
8890
- completedToolResults.set(event.toolCallId, {
8891
- output: event.result,
8892
- durationMs
8893
- });
8894
- request.streamCallbacks?.onToolCallEnd?.(
8895
- event.toolName,
8896
- tracker?.args,
8897
- event.result,
8898
- durationMs,
8899
- event.toolCallId
8900
- );
8901
- toolTrackers.delete(event.toolCallId);
8902
- break;
8903
- }
8904
- }
8905
- });
8906
- try {
8907
- if (this.config.timeoutMs) {
8908
- const timeoutMs = this.config.timeoutMs;
8909
- const timeoutPromise = new Promise((_, reject) => {
8910
- setTimeout(
8911
- () => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
8912
- timeoutMs
8913
- );
8914
- });
8915
- await Promise.race([agent.prompt(request.question), timeoutPromise]);
8916
- } else {
8917
- await agent.prompt(request.question);
8918
- }
8919
- await agent.waitForIdle();
8920
- const agentMessages = agent.state.messages;
8921
- const output = [];
8922
- for (const msg of agentMessages) {
8923
- output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
8924
- }
8925
- const endTimeIso = (/* @__PURE__ */ new Date()).toISOString();
8926
- const durationMs = Date.now() - startMs;
8927
- return {
8928
- raw: {
8929
- messages: agentMessages,
8930
- systemPrompt,
8931
- model: this.config.model,
8932
- subprovider: this.config.subprovider
8933
- },
8934
- output,
8935
- tokenUsage,
8936
- costUsd,
8937
- durationMs,
8938
- startTime: startTimeIso,
8939
- endTime: endTimeIso
8940
- };
8941
- } finally {
8942
- unsubscribe();
8943
- }
8944
- }
8945
- };
8946
- function convertAgentMessage(message, toolTrackers, completedToolResults) {
8947
- if (!message || typeof message !== "object") {
8948
- return { role: "unknown", content: String(message) };
8949
- }
8950
- const msg = message;
8951
- const role = typeof msg.role === "string" ? msg.role : "unknown";
8952
- const content = extractPiTextContent(msg.content);
8953
- const toolCalls = extractToolCalls3(msg.content, toolTrackers, completedToolResults);
8954
- const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
8955
- let msgTokenUsage;
8956
- if (msg.usage && typeof msg.usage === "object") {
8957
- const u = msg.usage;
8958
- const input = toFiniteNumber(u.input);
8959
- const output = toFiniteNumber(u.output);
8960
- if (input !== void 0 || output !== void 0) {
8961
- msgTokenUsage = {
8962
- input: input ?? 0,
8963
- output: output ?? 0,
8964
- ...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
8965
- };
8966
- }
8967
- }
8968
- const metadata = {};
8969
- if (msg.api) metadata.api = msg.api;
8970
- if (msg.provider) metadata.provider = msg.provider;
8971
- if (msg.model) metadata.model = msg.model;
8972
- if (msg.stopReason) metadata.stopReason = msg.stopReason;
8973
- return {
8974
- role,
8975
- content,
8976
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
8977
- startTime,
8978
- metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
8979
- tokenUsage: msgTokenUsage
8980
- };
8981
- }
8982
- function extractToolCalls3(content, toolTrackers, completedToolResults) {
8983
- if (!Array.isArray(content)) {
8984
- return [];
8985
- }
8986
- const toolCalls = [];
8987
- for (const part of content) {
8988
- if (!part || typeof part !== "object") {
8989
- continue;
8990
- }
8991
- const p = part;
8992
- if (p.type === "toolCall" && typeof p.name === "string") {
8993
- const id = typeof p.id === "string" ? p.id : void 0;
8994
- const tracker = id ? toolTrackers.get(id) : void 0;
8995
- const completed = id ? completedToolResults.get(id) : void 0;
8996
- toolCalls.push({
8997
- tool: p.name,
8998
- input: p.arguments,
8999
- id,
9000
- output: completed?.output,
9001
- durationMs: completed?.durationMs,
9002
- startTime: tracker?.startTime,
9003
- endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
9004
- });
9005
- }
9006
- }
9007
- return toolCalls;
9008
- }
9009
-
9010
- // src/evaluation/providers/pi-coding-agent.ts
8760
+ // src/evaluation/providers/pi-cli.ts
9011
8761
  var import_node_child_process4 = require("child_process");
9012
8762
  var import_node_crypto7 = require("crypto");
9013
8763
  var import_node_fs8 = require("fs");
@@ -9068,25 +8818,50 @@ function subscribeToPiLogEntries(listener) {
9068
8818
  };
9069
8819
  }
9070
8820
 
9071
- // src/evaluation/providers/pi-coding-agent.ts
8821
+ // src/evaluation/providers/pi-utils.ts
8822
+ function extractPiTextContent(content) {
8823
+ if (typeof content === "string") {
8824
+ return content;
8825
+ }
8826
+ if (!Array.isArray(content)) {
8827
+ return void 0;
8828
+ }
8829
+ const textParts = [];
8830
+ for (const part of content) {
8831
+ if (!part || typeof part !== "object") {
8832
+ continue;
8833
+ }
8834
+ const p = part;
8835
+ if (p.type === "text" && typeof p.text === "string") {
8836
+ textParts.push(p.text);
8837
+ }
8838
+ }
8839
+ return textParts.length > 0 ? textParts.join("\n") : void 0;
8840
+ }
8841
+ function toFiniteNumber(value) {
8842
+ if (typeof value === "number" && Number.isFinite(value)) return value;
8843
+ return void 0;
8844
+ }
8845
+
8846
+ // src/evaluation/providers/pi-cli.ts
9072
8847
  var WORKSPACE_PREFIX = "agentv-pi-";
9073
8848
  var PROMPT_FILENAME = "prompt.md";
9074
- var PiCodingAgentProvider = class {
8849
+ var PiCliProvider = class {
9075
8850
  id;
9076
- kind = "pi-coding-agent";
8851
+ kind = "pi-cli";
9077
8852
  targetName;
9078
8853
  supportsBatch = false;
9079
8854
  config;
9080
8855
  runPi;
9081
8856
  constructor(targetName, config, runner = defaultPiRunner) {
9082
- this.id = `pi-coding-agent:${targetName}`;
8857
+ this.id = `pi-cli:${targetName}`;
9083
8858
  this.targetName = targetName;
9084
8859
  this.config = config;
9085
8860
  this.runPi = runner;
9086
8861
  }
9087
8862
  async invoke(request) {
9088
8863
  if (request.signal?.aborted) {
9089
- throw new Error("Pi coding agent request was aborted before execution");
8864
+ throw new Error("Pi CLI request was aborted before execution");
9090
8865
  }
9091
8866
  const inputFiles = normalizeInputFiles(request.inputFiles);
9092
8867
  const startTime = (/* @__PURE__ */ new Date()).toISOString();
@@ -9096,17 +8871,17 @@ var PiCodingAgentProvider = class {
9096
8871
  try {
9097
8872
  const promptFile = import_node_path19.default.join(workspaceRoot, PROMPT_FILENAME);
9098
8873
  await (0, import_promises16.writeFile)(promptFile, request.question, "utf8");
9099
- const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
8874
+ const args = this.buildPiArgs(request.question, inputFiles);
9100
8875
  const cwd = this.resolveCwd(workspaceRoot, request.cwd);
9101
8876
  const result = await this.executePi(args, cwd, request.signal, logger);
9102
8877
  if (result.timedOut) {
9103
8878
  throw new Error(
9104
- `Pi coding agent timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
8879
+ `Pi CLI timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
9105
8880
  );
9106
8881
  }
9107
8882
  if (result.exitCode !== 0) {
9108
8883
  const detail = pickDetail(result.stderr, result.stdout);
9109
- const prefix = `Pi coding agent exited with code ${result.exitCode}`;
8884
+ const prefix = `Pi CLI exited with code ${result.exitCode}`;
9110
8885
  throw new Error(detail ? `${prefix}: ${detail}` : prefix);
9111
8886
  }
9112
8887
  const parsed = parsePiJsonl(result.stdout);
@@ -9163,7 +8938,7 @@ var PiCodingAgentProvider = class {
9163
8938
  }
9164
8939
  return import_node_path19.default.resolve(this.config.cwd);
9165
8940
  }
9166
- buildPiArgs(prompt, inputFiles, _captureFileChanges) {
8941
+ buildPiArgs(prompt, inputFiles) {
9167
8942
  const args = [];
9168
8943
  if (this.config.subprovider) {
9169
8944
  args.push("--provider", this.config.subprovider);
@@ -9215,7 +8990,7 @@ ${prompt}` : prompt;
9215
8990
  const err = error;
9216
8991
  if (err.code === "ENOENT") {
9217
8992
  throw new Error(
9218
- `Pi coding agent executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
8993
+ `Pi CLI executable '${this.config.executable}' was not found. Update the target executable or add it to PATH.`
9219
8994
  );
9220
8995
  }
9221
8996
  throw error;
@@ -9225,26 +9000,18 @@ ${prompt}` : prompt;
9225
9000
  const env = { ...process.env };
9226
9001
  if (this.config.apiKey) {
9227
9002
  const provider = this.config.subprovider?.toLowerCase() ?? "google";
9228
- switch (provider) {
9229
- case "google":
9230
- case "gemini":
9231
- env.GEMINI_API_KEY = this.config.apiKey;
9232
- break;
9233
- case "anthropic":
9234
- env.ANTHROPIC_API_KEY = this.config.apiKey;
9235
- break;
9236
- case "openai":
9237
- env.OPENAI_API_KEY = this.config.apiKey;
9238
- break;
9239
- case "groq":
9240
- env.GROQ_API_KEY = this.config.apiKey;
9241
- break;
9242
- case "xai":
9243
- env.XAI_API_KEY = this.config.apiKey;
9244
- break;
9245
- case "openrouter":
9246
- env.OPENROUTER_API_KEY = this.config.apiKey;
9247
- break;
9003
+ const ENV_KEY_MAP = {
9004
+ google: "GEMINI_API_KEY",
9005
+ gemini: "GEMINI_API_KEY",
9006
+ anthropic: "ANTHROPIC_API_KEY",
9007
+ openai: "OPENAI_API_KEY",
9008
+ groq: "GROQ_API_KEY",
9009
+ xai: "XAI_API_KEY",
9010
+ openrouter: "OPENROUTER_API_KEY"
9011
+ };
9012
+ const envKey = ENV_KEY_MAP[provider];
9013
+ if (envKey) {
9014
+ env[envKey] = this.config.apiKey;
9248
9015
  }
9249
9016
  }
9250
9017
  return env;
@@ -9262,7 +9029,7 @@ ${prompt}` : prompt;
9262
9029
  if (this.config.logDir) {
9263
9030
  return import_node_path19.default.resolve(this.config.logDir);
9264
9031
  }
9265
- return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
9032
+ return import_node_path19.default.join(process.cwd(), ".agentv", "logs", "pi-cli");
9266
9033
  }
9267
9034
  async createStreamLogger(request) {
9268
9035
  const logDir = this.resolveLogDirectory();
@@ -9314,7 +9081,7 @@ var PiStreamLogger = class _PiStreamLogger {
9314
9081
  static async create(options) {
9315
9082
  const logger = new _PiStreamLogger(options.filePath, options.format);
9316
9083
  const header = [
9317
- "# Pi Coding Agent stream log",
9084
+ "# Pi CLI stream log",
9318
9085
  `# target: ${options.targetName}`,
9319
9086
  options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
9320
9087
  options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
@@ -9463,10 +9230,10 @@ function summarizePiEvent(event) {
9463
9230
  return `${type}: ${role}`;
9464
9231
  }
9465
9232
  case "message_update": {
9466
- const event2 = record.assistantMessageEvent;
9467
- const eventType = event2?.type;
9233
+ const evt = record.assistantMessageEvent;
9234
+ const eventType = evt?.type;
9468
9235
  if (eventType === "text_delta") {
9469
- const delta = event2?.delta;
9236
+ const delta = evt?.delta;
9470
9237
  if (typeof delta === "string") {
9471
9238
  const preview = delta.length > 50 ? `${delta.slice(0, 50)}...` : delta;
9472
9239
  return `text_delta: ${preview}`;
@@ -9488,7 +9255,7 @@ function tryParseJsonValue(rawLine) {
9488
9255
  function parsePiJsonl(output) {
9489
9256
  const trimmed = output.trim();
9490
9257
  if (trimmed.length === 0) {
9491
- throw new Error("Pi coding agent produced no output");
9258
+ throw new Error("Pi CLI produced no output");
9492
9259
  }
9493
9260
  const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
9494
9261
  const parsed = [];
@@ -9499,38 +9266,27 @@ function parsePiJsonl(output) {
9499
9266
  }
9500
9267
  }
9501
9268
  if (parsed.length === 0) {
9502
- throw new Error("Pi coding agent produced no valid JSON output");
9269
+ throw new Error("Pi CLI produced no valid JSON output");
9503
9270
  }
9504
9271
  return parsed;
9505
9272
  }
9506
9273
  function extractMessages(events) {
9507
9274
  for (let i = events.length - 1; i >= 0; i--) {
9508
9275
  const event = events[i];
9509
- if (!event || typeof event !== "object") {
9510
- continue;
9511
- }
9276
+ if (!event || typeof event !== "object") continue;
9512
9277
  const record = event;
9513
- if (record.type !== "agent_end") {
9514
- continue;
9515
- }
9278
+ if (record.type !== "agent_end") continue;
9516
9279
  const messages = record.messages;
9517
- if (!Array.isArray(messages)) {
9518
- continue;
9519
- }
9280
+ if (!Array.isArray(messages)) continue;
9520
9281
  return messages.map(convertPiMessage).filter((m) => m !== void 0);
9521
9282
  }
9522
9283
  const output = [];
9523
9284
  for (const event of events) {
9524
- if (!event || typeof event !== "object") {
9525
- continue;
9526
- }
9285
+ if (!event || typeof event !== "object") continue;
9527
9286
  const record = event;
9528
9287
  if (record.type === "turn_end") {
9529
- const message = record.message;
9530
- const converted = convertPiMessage(message);
9531
- if (converted) {
9532
- output.push(converted);
9533
- }
9288
+ const converted = convertPiMessage(record.message);
9289
+ if (converted) output.push(converted);
9534
9290
  }
9535
9291
  }
9536
9292
  return output;
@@ -9547,10 +9303,7 @@ function extractTokenUsage(events) {
9547
9303
  const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
9548
9304
  const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
9549
9305
  if (input !== void 0 || output !== void 0) {
9550
- const result = {
9551
- input: input ?? 0,
9552
- output: output ?? 0
9553
- };
9306
+ const result = { input: input ?? 0, output: output ?? 0 };
9554
9307
  const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
9555
9308
  const reasoning = toFiniteNumber(u.reasoning_tokens ?? u.reasoningTokens ?? u.reasoning);
9556
9309
  return {
@@ -9598,16 +9351,12 @@ function aggregateUsageFromMessages(messages) {
9598
9351
  return result;
9599
9352
  }
9600
9353
  function convertPiMessage(message) {
9601
- if (!message || typeof message !== "object") {
9602
- return void 0;
9603
- }
9354
+ if (!message || typeof message !== "object") return void 0;
9604
9355
  const msg = message;
9605
9356
  const role = msg.role;
9606
- if (typeof role !== "string") {
9607
- return void 0;
9608
- }
9357
+ if (typeof role !== "string") return void 0;
9609
9358
  const content = extractPiTextContent(msg.content);
9610
- const toolCalls = extractToolCalls4(msg.content);
9359
+ const toolCalls = extractToolCalls3(msg.content);
9611
9360
  const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
9612
9361
  const metadata = {};
9613
9362
  if (msg.api) metadata.api = msg.api;
@@ -9623,15 +9372,11 @@ function convertPiMessage(message) {
9623
9372
  metadata: Object.keys(metadata).length > 0 ? metadata : void 0
9624
9373
  };
9625
9374
  }
9626
- function extractToolCalls4(content) {
9627
- if (!Array.isArray(content)) {
9628
- return [];
9629
- }
9375
+ function extractToolCalls3(content) {
9376
+ if (!Array.isArray(content)) return [];
9630
9377
  const toolCalls = [];
9631
9378
  for (const part of content) {
9632
- if (!part || typeof part !== "object") {
9633
- continue;
9634
- }
9379
+ if (!part || typeof part !== "object") continue;
9635
9380
  const p = part;
9636
9381
  if (p.type === "tool_use" && typeof p.name === "string") {
9637
9382
  toolCalls.push({
@@ -9651,10 +9396,7 @@ function extractToolCalls4(content) {
9651
9396
  const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
9652
9397
  if (existing) {
9653
9398
  const idx = toolCalls.indexOf(existing);
9654
- toolCalls[idx] = {
9655
- ...existing,
9656
- output: p.content
9657
- };
9399
+ toolCalls[idx] = { ...existing, output: p.content };
9658
9400
  }
9659
9401
  }
9660
9402
  }
@@ -9665,18 +9407,13 @@ function escapeAtSymbols(prompt) {
9665
9407
  }
9666
9408
  function pickDetail(stderr, stdout) {
9667
9409
  const errorText = stderr.trim();
9668
- if (errorText.length > 0) {
9669
- return errorText;
9670
- }
9410
+ if (errorText.length > 0) return errorText;
9671
9411
  const stdoutText = stdout.trim();
9672
9412
  return stdoutText.length > 0 ? stdoutText : void 0;
9673
9413
  }
9674
9414
  function formatTimeoutSuffix3(timeoutMs) {
9675
- if (!timeoutMs || timeoutMs <= 0) {
9676
- return "";
9677
- }
9678
- const seconds = Math.ceil(timeoutMs / 1e3);
9679
- return ` after ${seconds}s`;
9415
+ if (!timeoutMs || timeoutMs <= 0) return "";
9416
+ return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
9680
9417
  }
9681
9418
  async function defaultPiRunner(options) {
9682
9419
  return await new Promise((resolve, reject) => {
@@ -9711,39 +9448,497 @@ async function defaultPiRunner(options) {
9711
9448
  }, options.timeoutMs);
9712
9449
  timeoutHandle.unref?.();
9713
9450
  }
9714
- child.stdout.setEncoding("utf8");
9715
- child.stdout.on("data", (chunk) => {
9716
- stdout += chunk;
9717
- options.onStdoutChunk?.(chunk);
9718
- });
9719
- child.stderr.setEncoding("utf8");
9720
- child.stderr.on("data", (chunk) => {
9721
- stderr += chunk;
9722
- options.onStderrChunk?.(chunk);
9723
- });
9724
- child.stdin.end();
9725
- const cleanup = () => {
9726
- if (timeoutHandle) {
9727
- clearTimeout(timeoutHandle);
9728
- }
9729
- if (options.signal) {
9730
- options.signal.removeEventListener("abort", onAbort);
9731
- }
9732
- };
9733
- child.on("error", (error) => {
9734
- cleanup();
9735
- reject(error);
9736
- });
9737
- child.on("close", (code) => {
9738
- cleanup();
9739
- resolve({
9740
- stdout,
9741
- stderr,
9742
- exitCode: typeof code === "number" ? code : -1,
9743
- timedOut
9451
+ child.stdout.setEncoding("utf8");
9452
+ child.stdout.on("data", (chunk) => {
9453
+ stdout += chunk;
9454
+ options.onStdoutChunk?.(chunk);
9455
+ });
9456
+ child.stderr.setEncoding("utf8");
9457
+ child.stderr.on("data", (chunk) => {
9458
+ stderr += chunk;
9459
+ options.onStderrChunk?.(chunk);
9460
+ });
9461
+ child.stdin.end();
9462
+ const cleanup = () => {
9463
+ if (timeoutHandle) clearTimeout(timeoutHandle);
9464
+ if (options.signal) options.signal.removeEventListener("abort", onAbort);
9465
+ };
9466
+ child.on("error", (error) => {
9467
+ cleanup();
9468
+ reject(error);
9469
+ });
9470
+ child.on("close", (code) => {
9471
+ cleanup();
9472
+ resolve({
9473
+ stdout,
9474
+ stderr,
9475
+ exitCode: typeof code === "number" ? code : -1,
9476
+ timedOut
9477
+ });
9478
+ });
9479
+ });
9480
+ }
9481
+
9482
+ // src/evaluation/providers/pi-coding-agent.ts
9483
+ var import_node_child_process5 = require("child_process");
9484
+ var import_node_crypto8 = require("crypto");
9485
+ var import_node_fs9 = require("fs");
9486
+ var import_promises17 = require("fs/promises");
9487
+ var import_node_path20 = __toESM(require("path"), 1);
9488
+ var import_node_readline = require("readline");
9489
+ var piCodingAgentModule = null;
9490
+ var piAiModule = null;
9491
+ async function promptInstall() {
9492
+ if (!process.stdout.isTTY) return false;
9493
+ const rl = (0, import_node_readline.createInterface)({ input: process.stdin, output: process.stderr });
9494
+ try {
9495
+ return await new Promise((resolve) => {
9496
+ rl.question(
9497
+ "@mariozechner/pi-coding-agent is not installed. Install it now? (y/N) ",
9498
+ (answer) => resolve(answer.trim().toLowerCase() === "y")
9499
+ );
9500
+ });
9501
+ } finally {
9502
+ rl.close();
9503
+ }
9504
+ }
9505
+ async function loadSdkModules() {
9506
+ if (!piCodingAgentModule || !piAiModule) {
9507
+ try {
9508
+ [piCodingAgentModule, piAiModule] = await Promise.all([
9509
+ import("@mariozechner/pi-coding-agent"),
9510
+ import("@mariozechner/pi-ai")
9511
+ ]);
9512
+ } catch {
9513
+ if (await promptInstall()) {
9514
+ console.error("Installing @mariozechner/pi-coding-agent...");
9515
+ (0, import_node_child_process5.execSync)("bun add @mariozechner/pi-coding-agent", { stdio: "inherit" });
9516
+ [piCodingAgentModule, piAiModule] = await Promise.all([
9517
+ import("@mariozechner/pi-coding-agent"),
9518
+ import("@mariozechner/pi-ai")
9519
+ ]);
9520
+ } else {
9521
+ throw new Error(
9522
+ "pi-coding-agent SDK is not installed. Install it with:\n bun add @mariozechner/pi-coding-agent"
9523
+ );
9524
+ }
9525
+ }
9526
+ }
9527
+ const toolMap = {
9528
+ read: piCodingAgentModule.readTool,
9529
+ bash: piCodingAgentModule.bashTool,
9530
+ edit: piCodingAgentModule.editTool,
9531
+ write: piCodingAgentModule.writeTool,
9532
+ grep: piCodingAgentModule.grepTool,
9533
+ find: piCodingAgentModule.findTool,
9534
+ ls: piCodingAgentModule.lsTool
9535
+ };
9536
+ return {
9537
+ createAgentSession: piCodingAgentModule.createAgentSession,
9538
+ codingTools: piCodingAgentModule.codingTools,
9539
+ toolMap,
9540
+ SessionManager: piCodingAgentModule.SessionManager,
9541
+ getModel: piAiModule.getModel
9542
+ };
9543
+ }
9544
+ var PiCodingAgentProvider = class {
9545
+ id;
9546
+ kind = "pi-coding-agent";
9547
+ targetName;
9548
+ supportsBatch = false;
9549
+ config;
9550
+ constructor(targetName, config) {
9551
+ this.id = `pi-coding-agent:${targetName}`;
9552
+ this.targetName = targetName;
9553
+ this.config = config;
9554
+ }
9555
+ async invoke(request) {
9556
+ if (request.signal?.aborted) {
9557
+ throw new Error("Pi coding agent request was aborted before execution");
9558
+ }
9559
+ const inputFiles = normalizeInputFiles(request.inputFiles);
9560
+ const startTime = (/* @__PURE__ */ new Date()).toISOString();
9561
+ const startMs = Date.now();
9562
+ const sdk = await loadSdkModules();
9563
+ const logger = await this.createStreamLogger(request).catch(() => void 0);
9564
+ try {
9565
+ const cwd = this.resolveCwd(request.cwd);
9566
+ const providerName = this.config.subprovider ?? "google";
9567
+ const modelId = this.config.model ?? "gemini-2.5-flash";
9568
+ this.setApiKeyEnv(providerName);
9569
+ const model = sdk.getModel(providerName, modelId);
9570
+ const tools = this.resolveTools(sdk);
9571
+ const { session } = await sdk.createAgentSession({
9572
+ cwd,
9573
+ model,
9574
+ tools,
9575
+ thinkingLevel: this.config.thinking,
9576
+ sessionManager: sdk.SessionManager.inMemory(cwd)
9577
+ });
9578
+ let tokenUsage;
9579
+ let costUsd;
9580
+ const toolTrackers = /* @__PURE__ */ new Map();
9581
+ const completedToolResults = /* @__PURE__ */ new Map();
9582
+ const unsubscribe = session.subscribe((event) => {
9583
+ logger?.handleEvent(event);
9584
+ switch (event.type) {
9585
+ case "message_end": {
9586
+ const msg = event.message;
9587
+ if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
9588
+ const usage = msg.usage;
9589
+ if (usage && typeof usage === "object") {
9590
+ const u = usage;
9591
+ const input = toFiniteNumber(u.input);
9592
+ const output = toFiniteNumber(u.output);
9593
+ const cached = toFiniteNumber(u.cacheRead);
9594
+ let callDelta;
9595
+ if (input !== void 0 || output !== void 0) {
9596
+ callDelta = {
9597
+ input: input ?? 0,
9598
+ output: output ?? 0,
9599
+ ...cached !== void 0 ? { cached } : {}
9600
+ };
9601
+ tokenUsage = {
9602
+ input: (tokenUsage?.input ?? 0) + callDelta.input,
9603
+ output: (tokenUsage?.output ?? 0) + callDelta.output,
9604
+ ...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
9605
+ };
9606
+ }
9607
+ const cost = u.cost;
9608
+ if (cost && typeof cost === "object") {
9609
+ const total = toFiniteNumber(cost.total);
9610
+ if (total !== void 0) {
9611
+ costUsd = (costUsd ?? 0) + total;
9612
+ }
9613
+ }
9614
+ request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
9615
+ }
9616
+ }
9617
+ break;
9618
+ }
9619
+ case "tool_execution_start": {
9620
+ toolTrackers.set(event.toolCallId, {
9621
+ toolCallId: event.toolCallId,
9622
+ toolName: event.toolName,
9623
+ args: event.args,
9624
+ startMs: Date.now(),
9625
+ startTime: (/* @__PURE__ */ new Date()).toISOString()
9626
+ });
9627
+ request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
9628
+ break;
9629
+ }
9630
+ case "tool_execution_end": {
9631
+ const tracker = toolTrackers.get(event.toolCallId);
9632
+ const durationMs = tracker ? Date.now() - tracker.startMs : 0;
9633
+ completedToolResults.set(event.toolCallId, {
9634
+ output: event.result,
9635
+ durationMs
9636
+ });
9637
+ request.streamCallbacks?.onToolCallEnd?.(
9638
+ event.toolName,
9639
+ tracker?.args,
9640
+ event.result,
9641
+ durationMs,
9642
+ event.toolCallId
9643
+ );
9644
+ toolTrackers.delete(event.toolCallId);
9645
+ break;
9646
+ }
9647
+ }
9648
+ });
9649
+ try {
9650
+ const systemPrompt = this.config.systemPrompt;
9651
+ let prompt = request.question;
9652
+ if (systemPrompt) {
9653
+ prompt = `${systemPrompt}
9654
+
9655
+ ${prompt}`;
9656
+ }
9657
+ if (inputFiles && inputFiles.length > 0) {
9658
+ const fileList = inputFiles.map((f) => `@${f}`).join("\n");
9659
+ prompt = `${prompt}
9660
+
9661
+ Files:
9662
+ ${fileList}`;
9663
+ }
9664
+ if (this.config.timeoutMs) {
9665
+ const timeoutMs = this.config.timeoutMs;
9666
+ let timeoutId;
9667
+ const timeoutPromise = new Promise((_, reject) => {
9668
+ timeoutId = setTimeout(
9669
+ () => reject(
9670
+ new Error(`Pi coding agent timed out after ${Math.ceil(timeoutMs / 1e3)}s`)
9671
+ ),
9672
+ timeoutMs
9673
+ );
9674
+ });
9675
+ try {
9676
+ await Promise.race([session.prompt(prompt), timeoutPromise]);
9677
+ } finally {
9678
+ if (timeoutId !== void 0) clearTimeout(timeoutId);
9679
+ }
9680
+ } else {
9681
+ await session.prompt(prompt);
9682
+ }
9683
+ const agentMessages = session.agent.state.messages;
9684
+ const output = [];
9685
+ for (const msg of agentMessages) {
9686
+ output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
9687
+ }
9688
+ const endTime = (/* @__PURE__ */ new Date()).toISOString();
9689
+ const durationMs = Date.now() - startMs;
9690
+ return {
9691
+ raw: {
9692
+ messages: agentMessages,
9693
+ model: this.config.model,
9694
+ provider: this.config.subprovider
9695
+ },
9696
+ output,
9697
+ tokenUsage,
9698
+ costUsd,
9699
+ durationMs,
9700
+ startTime,
9701
+ endTime
9702
+ };
9703
+ } finally {
9704
+ unsubscribe();
9705
+ session.dispose();
9706
+ }
9707
+ } finally {
9708
+ await logger?.close();
9709
+ }
9710
+ }
9711
+ /** Maps config apiKey to the provider-specific env var the SDK reads. */
9712
+ setApiKeyEnv(providerName) {
9713
+ if (!this.config.apiKey) return;
9714
+ const ENV_KEY_MAP = {
9715
+ google: "GEMINI_API_KEY",
9716
+ gemini: "GEMINI_API_KEY",
9717
+ anthropic: "ANTHROPIC_API_KEY",
9718
+ openai: "OPENAI_API_KEY",
9719
+ groq: "GROQ_API_KEY",
9720
+ xai: "XAI_API_KEY",
9721
+ openrouter: "OPENROUTER_API_KEY"
9722
+ };
9723
+ const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
9724
+ if (envKey) {
9725
+ process.env[envKey] = this.config.apiKey;
9726
+ }
9727
+ }
9728
+ resolveCwd(cwdOverride) {
9729
+ if (cwdOverride) {
9730
+ return import_node_path20.default.resolve(cwdOverride);
9731
+ }
9732
+ if (this.config.cwd) {
9733
+ return import_node_path20.default.resolve(this.config.cwd);
9734
+ }
9735
+ return process.cwd();
9736
+ }
9737
+ resolveTools(sdk) {
9738
+ if (!this.config.tools) {
9739
+ return sdk.codingTools;
9740
+ }
9741
+ const toolNames = this.config.tools.split(",").map((t) => t.trim().toLowerCase());
9742
+ const selected = [];
9743
+ for (const name of toolNames) {
9744
+ if (name in sdk.toolMap) {
9745
+ selected.push(sdk.toolMap[name]);
9746
+ }
9747
+ }
9748
+ return selected.length > 0 ? selected : sdk.codingTools;
9749
+ }
9750
+ resolveLogDirectory() {
9751
+ if (this.config.logDir) {
9752
+ return import_node_path20.default.resolve(this.config.logDir);
9753
+ }
9754
+ return import_node_path20.default.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
9755
+ }
9756
+ async createStreamLogger(request) {
9757
+ const logDir = this.resolveLogDirectory();
9758
+ if (!logDir) {
9759
+ return void 0;
9760
+ }
9761
+ try {
9762
+ await (0, import_promises17.mkdir)(logDir, { recursive: true });
9763
+ } catch (error) {
9764
+ const message = error instanceof Error ? error.message : String(error);
9765
+ console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
9766
+ return void 0;
9767
+ }
9768
+ const filePath = import_node_path20.default.join(logDir, buildLogFilename6(request, this.targetName));
9769
+ try {
9770
+ const logger = await PiStreamLogger2.create({
9771
+ filePath,
9772
+ targetName: this.targetName,
9773
+ evalCaseId: request.evalCaseId,
9774
+ attempt: request.attempt,
9775
+ format: this.config.logFormat ?? "summary"
9776
+ });
9777
+ recordPiLogEntry({
9778
+ filePath,
9779
+ targetName: this.targetName,
9780
+ evalCaseId: request.evalCaseId,
9781
+ attempt: request.attempt
9782
+ });
9783
+ return logger;
9784
+ } catch (error) {
9785
+ const message = error instanceof Error ? error.message : String(error);
9786
+ console.warn(`Skipping Pi stream logging for ${filePath}: ${message}`);
9787
+ return void 0;
9788
+ }
9789
+ }
9790
+ };
9791
+ var PiStreamLogger2 = class _PiStreamLogger {
9792
+ filePath;
9793
+ stream;
9794
+ startedAt = Date.now();
9795
+ format;
9796
+ constructor(filePath, format) {
9797
+ this.filePath = filePath;
9798
+ this.format = format;
9799
+ this.stream = (0, import_node_fs9.createWriteStream)(filePath, { flags: "a" });
9800
+ }
9801
+ static async create(options) {
9802
+ const logger = new _PiStreamLogger(options.filePath, options.format);
9803
+ const header = [
9804
+ "# Pi Coding Agent stream log",
9805
+ `# target: ${options.targetName}`,
9806
+ options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
9807
+ options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
9808
+ `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
9809
+ ""
9810
+ ].filter((line) => Boolean(line));
9811
+ for (const line of header) {
9812
+ logger.stream.write(`${line}
9813
+ `);
9814
+ }
9815
+ return logger;
9816
+ }
9817
+ handleEvent(event) {
9818
+ if (!event || typeof event !== "object") return;
9819
+ const record = event;
9820
+ const type = typeof record.type === "string" ? record.type : void 0;
9821
+ if (!type) return;
9822
+ const message = this.format === "json" ? JSON.stringify(event, null, 2) : summarizeSdkEvent2(event);
9823
+ if (message) {
9824
+ this.stream.write(`[+${formatElapsed6(this.startedAt)}] ${message}
9825
+ `);
9826
+ }
9827
+ }
9828
+ async close() {
9829
+ await new Promise((resolve, reject) => {
9830
+ this.stream.once("error", reject);
9831
+ this.stream.end(() => resolve());
9832
+ });
9833
+ }
9834
+ };
9835
+ function summarizeSdkEvent2(event) {
9836
+ if (!event || typeof event !== "object") return void 0;
9837
+ const record = event;
9838
+ const type = typeof record.type === "string" ? record.type : void 0;
9839
+ if (!type) return void 0;
9840
+ switch (type) {
9841
+ case "agent_start":
9842
+ case "agent_end":
9843
+ case "turn_start":
9844
+ case "turn_end":
9845
+ return type;
9846
+ case "message_start":
9847
+ case "message_end": {
9848
+ const msg = record.message;
9849
+ return `${type}: ${msg?.role ?? "unknown"}`;
9850
+ }
9851
+ case "tool_execution_start":
9852
+ return `tool_start: ${record.toolName}`;
9853
+ case "tool_execution_end":
9854
+ return `tool_end: ${record.toolName}`;
9855
+ default:
9856
+ return type;
9857
+ }
9858
+ }
9859
+ function buildLogFilename6(request, targetName) {
9860
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
9861
+ const evalId = sanitizeForFilename6(request.evalCaseId ?? "pi");
9862
+ const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
9863
+ const target = sanitizeForFilename6(targetName);
9864
+ return `${timestamp}_${target}_${evalId}${attemptSuffix}_${(0, import_node_crypto8.randomUUID)().slice(0, 8)}.log`;
9865
+ }
9866
+ function sanitizeForFilename6(value) {
9867
+ const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
9868
+ return sanitized.length > 0 ? sanitized : "pi";
9869
+ }
9870
+ function formatElapsed6(startedAt) {
9871
+ const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
9872
+ const hours = Math.floor(elapsedSeconds / 3600);
9873
+ const minutes = Math.floor(elapsedSeconds % 3600 / 60);
9874
+ const seconds = elapsedSeconds % 60;
9875
+ if (hours > 0) {
9876
+ return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
9877
+ }
9878
+ return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
9879
+ }
9880
+ function convertAgentMessage(message, toolTrackers, completedToolResults) {
9881
+ if (!message || typeof message !== "object") {
9882
+ return { role: "unknown", content: String(message) };
9883
+ }
9884
+ const msg = message;
9885
+ const role = typeof msg.role === "string" ? msg.role : "unknown";
9886
+ const content = extractPiTextContent(msg.content);
9887
+ const toolCalls = extractToolCalls4(msg.content, toolTrackers, completedToolResults);
9888
+ const startTimeVal = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
9889
+ let msgTokenUsage;
9890
+ if (msg.usage && typeof msg.usage === "object") {
9891
+ const u = msg.usage;
9892
+ const input = toFiniteNumber(u.input);
9893
+ const output = toFiniteNumber(u.output);
9894
+ if (input !== void 0 || output !== void 0) {
9895
+ msgTokenUsage = {
9896
+ input: input ?? 0,
9897
+ output: output ?? 0,
9898
+ ...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
9899
+ };
9900
+ }
9901
+ }
9902
+ const metadata = {};
9903
+ if (msg.api) metadata.api = msg.api;
9904
+ if (msg.provider) metadata.provider = msg.provider;
9905
+ if (msg.model) metadata.model = msg.model;
9906
+ if (msg.stopReason) metadata.stopReason = msg.stopReason;
9907
+ return {
9908
+ role,
9909
+ content,
9910
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
9911
+ startTime: startTimeVal,
9912
+ metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
9913
+ tokenUsage: msgTokenUsage
9914
+ };
9915
+ }
9916
+ function extractToolCalls4(content, toolTrackers, completedToolResults) {
9917
+ if (!Array.isArray(content)) {
9918
+ return [];
9919
+ }
9920
+ const toolCalls = [];
9921
+ for (const part of content) {
9922
+ if (!part || typeof part !== "object") {
9923
+ continue;
9924
+ }
9925
+ const p = part;
9926
+ if (p.type === "toolCall" && typeof p.name === "string") {
9927
+ const id = typeof p.id === "string" ? p.id : void 0;
9928
+ const tracker = id ? toolTrackers.get(id) : void 0;
9929
+ const completed = id ? completedToolResults.get(id) : void 0;
9930
+ toolCalls.push({
9931
+ tool: p.name,
9932
+ input: p.arguments,
9933
+ id,
9934
+ output: completed?.output,
9935
+ durationMs: completed?.durationMs,
9936
+ startTime: tracker?.startTime,
9937
+ endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
9744
9938
  });
9745
- });
9746
- });
9939
+ }
9940
+ }
9941
+ return toolCalls;
9747
9942
  }
9748
9943
 
9749
9944
  // src/evaluation/providers/provider-registry.ts
@@ -9782,7 +9977,7 @@ var ProviderRegistry = class {
9782
9977
  };
9783
9978
 
9784
9979
  // src/evaluation/providers/targets.ts
9785
- var import_node_path20 = __toESM(require("path"), 1);
9980
+ var import_node_path21 = __toESM(require("path"), 1);
9786
9981
  var import_zod3 = require("zod");
9787
9982
  var CliHealthcheckHttpInputSchema = import_zod3.z.object({
9788
9983
  url: import_zod3.z.string().min(1, "healthcheck URL is required"),
@@ -9879,11 +10074,11 @@ function normalizeCliHealthcheck(input, env, targetName, evalFilePath) {
9879
10074
  allowLiteral: true,
9880
10075
  optionalEnv: true
9881
10076
  });
9882
- if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
9883
- cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
10077
+ if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
10078
+ cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
9884
10079
  }
9885
10080
  if (!cwd && evalFilePath) {
9886
- cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
10081
+ cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
9887
10082
  }
9888
10083
  return {
9889
10084
  command,
@@ -9906,15 +10101,15 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
9906
10101
  optionalEnv: true
9907
10102
  }
9908
10103
  );
9909
- if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
9910
- workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10104
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10105
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
9911
10106
  }
9912
10107
  let cwd = resolveOptionalString(input.cwd, env, `${targetName} working directory`, {
9913
10108
  allowLiteral: true,
9914
10109
  optionalEnv: true
9915
10110
  });
9916
- if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
9917
- cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
10111
+ if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
10112
+ cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
9918
10113
  }
9919
10114
  if (cwd && workspaceTemplate) {
9920
10115
  throw new Error(
@@ -9922,7 +10117,7 @@ function normalizeCliTargetInput(input, env, evalFilePath) {
9922
10117
  );
9923
10118
  }
9924
10119
  if (!cwd && !workspaceTemplate && evalFilePath) {
9925
- cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
10120
+ cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
9926
10121
  }
9927
10122
  const timeoutSeconds = input.timeout_seconds ?? input.timeoutSeconds;
9928
10123
  const timeoutMs = timeoutSeconds !== void 0 ? Math.floor(timeoutSeconds * 1e3) : void 0;
@@ -10110,14 +10305,14 @@ function resolveTargetDefinition(definition, env = process.env, evalFilePath) {
10110
10305
  providerBatching,
10111
10306
  config: resolvePiCodingAgentConfig(parsed, env, evalFilePath)
10112
10307
  };
10113
- case "pi-agent-sdk":
10308
+ case "pi-cli":
10114
10309
  return {
10115
- kind: "pi-agent-sdk",
10310
+ kind: "pi-cli",
10116
10311
  name: parsed.name,
10117
10312
  graderTarget: parsed.grader_target ?? parsed.judge_target,
10118
10313
  workers: parsed.workers,
10119
10314
  providerBatching,
10120
- config: resolvePiAgentSdkConfig(parsed, env)
10315
+ config: resolvePiCliConfig(parsed, env, evalFilePath)
10121
10316
  };
10122
10317
  case "claude":
10123
10318
  case "claude-code":
@@ -10344,8 +10539,8 @@ function resolveCodexConfig(target, env, evalFilePath) {
10344
10539
  optionalEnv: true
10345
10540
  }
10346
10541
  );
10347
- if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10348
- workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10542
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10543
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10349
10544
  }
10350
10545
  if (cwd && workspaceTemplate) {
10351
10546
  throw new Error(
@@ -10429,8 +10624,8 @@ function resolveCopilotSdkConfig(target, env, evalFilePath) {
10429
10624
  optionalEnv: true
10430
10625
  }
10431
10626
  );
10432
- if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10433
- workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10627
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10628
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10434
10629
  }
10435
10630
  if (cwd && workspaceTemplate) {
10436
10631
  throw new Error(
@@ -10494,8 +10689,8 @@ function resolveCopilotCliConfig(target, env, evalFilePath) {
10494
10689
  optionalEnv: true
10495
10690
  }
10496
10691
  );
10497
- if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10498
- workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10692
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10693
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10499
10694
  }
10500
10695
  if (cwd && workspaceTemplate) {
10501
10696
  throw new Error(
@@ -10534,23 +10729,17 @@ function normalizeCopilotLogFormat(value) {
10534
10729
  throw new Error("copilot log format must be 'summary' or 'json'");
10535
10730
  }
10536
10731
  function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10537
- const executableSource = target.executable ?? target.command ?? target.binary;
10538
10732
  const subproviderSource = target.subprovider;
10539
10733
  const modelSource = target.model ?? target.pi_model ?? target.piModel;
10540
10734
  const apiKeySource = target.api_key ?? target.apiKey;
10541
10735
  const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
10542
10736
  const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
10543
- const argsSource = target.args ?? target.arguments;
10544
10737
  const cwdSource = target.cwd;
10545
10738
  const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
10546
10739
  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
10547
10740
  const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
10548
10741
  const logFormatSource = target.log_format ?? target.logFormat;
10549
10742
  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
10550
- const executable = resolveOptionalString(executableSource, env, `${target.name} pi executable`, {
10551
- allowLiteral: true,
10552
- optionalEnv: true
10553
- }) ?? "pi";
10554
10743
  const subprovider = resolveOptionalString(
10555
10744
  subproviderSource,
10556
10745
  env,
@@ -10576,7 +10765,6 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10576
10765
  allowLiteral: true,
10577
10766
  optionalEnv: true
10578
10767
  });
10579
- const args = resolveOptionalStringArray(argsSource, env, `${target.name} pi args`);
10580
10768
  const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi cwd`, {
10581
10769
  allowLiteral: true,
10582
10770
  optionalEnv: true
@@ -10590,8 +10778,8 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10590
10778
  optionalEnv: true
10591
10779
  }
10592
10780
  );
10593
- if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10594
- workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10781
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10782
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10595
10783
  }
10596
10784
  if (cwd && workspaceTemplate) {
10597
10785
  throw new Error(
@@ -10606,13 +10794,11 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10606
10794
  const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
10607
10795
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
10608
10796
  return {
10609
- executable,
10610
10797
  subprovider,
10611
10798
  model,
10612
10799
  apiKey,
10613
10800
  tools,
10614
10801
  thinking,
10615
- args,
10616
10802
  cwd,
10617
10803
  workspaceTemplate,
10618
10804
  timeoutMs,
@@ -10621,36 +10807,83 @@ function resolvePiCodingAgentConfig(target, env, evalFilePath) {
10621
10807
  systemPrompt
10622
10808
  };
10623
10809
  }
10624
- function resolvePiAgentSdkConfig(target, env) {
10810
+ function resolvePiCliConfig(target, env, evalFilePath) {
10811
+ const executableSource = target.executable ?? target.command ?? target.binary;
10625
10812
  const subproviderSource = target.subprovider;
10626
10813
  const modelSource = target.model ?? target.pi_model ?? target.piModel;
10627
10814
  const apiKeySource = target.api_key ?? target.apiKey;
10815
+ const toolsSource = target.tools ?? target.pi_tools ?? target.piTools;
10816
+ const thinkingSource = target.thinking ?? target.pi_thinking ?? target.piThinking;
10817
+ const cwdSource = target.cwd;
10818
+ const workspaceTemplateSource = target.workspace_template ?? target.workspaceTemplate;
10628
10819
  const timeoutSource = target.timeout_seconds ?? target.timeoutSeconds;
10820
+ const logDirSource = target.log_dir ?? target.logDir ?? target.log_directory ?? target.logDirectory;
10821
+ const logFormatSource = target.log_format ?? target.logFormat;
10629
10822
  const systemPromptSource = target.system_prompt ?? target.systemPrompt;
10823
+ const executable = resolveOptionalString(executableSource, env, `${target.name} pi-cli executable`, {
10824
+ allowLiteral: true,
10825
+ optionalEnv: true
10826
+ }) ?? "pi";
10630
10827
  const subprovider = resolveOptionalString(
10631
10828
  subproviderSource,
10632
10829
  env,
10633
- `${target.name} pi-agent-sdk subprovider`,
10634
- {
10635
- allowLiteral: true,
10636
- optionalEnv: true
10637
- }
10830
+ `${target.name} pi-cli subprovider`,
10831
+ { allowLiteral: true, optionalEnv: true }
10638
10832
  );
10639
- const model = resolveOptionalString(modelSource, env, `${target.name} pi-agent-sdk model`, {
10833
+ const model = resolveOptionalString(modelSource, env, `${target.name} pi-cli model`, {
10640
10834
  allowLiteral: true,
10641
10835
  optionalEnv: true
10642
10836
  });
10643
- const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-agent-sdk api key`, {
10837
+ const apiKey = resolveOptionalString(apiKeySource, env, `${target.name} pi-cli api key`, {
10644
10838
  allowLiteral: false,
10645
10839
  optionalEnv: true
10646
10840
  });
10647
- const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-agent-sdk timeout`);
10841
+ const tools = resolveOptionalString(toolsSource, env, `${target.name} pi-cli tools`, {
10842
+ allowLiteral: true,
10843
+ optionalEnv: true
10844
+ });
10845
+ const thinking = resolveOptionalString(thinkingSource, env, `${target.name} pi-cli thinking`, {
10846
+ allowLiteral: true,
10847
+ optionalEnv: true
10848
+ });
10849
+ const rawArgs = target.args ?? target.arguments;
10850
+ const args = resolveOptionalStringArray(rawArgs, env, `${target.name} pi-cli args`);
10851
+ const cwd = resolveOptionalString(cwdSource, env, `${target.name} pi-cli cwd`, {
10852
+ allowLiteral: true,
10853
+ optionalEnv: true
10854
+ });
10855
+ let workspaceTemplate = resolveOptionalString(
10856
+ workspaceTemplateSource,
10857
+ env,
10858
+ `${target.name} pi-cli workspace template`,
10859
+ { allowLiteral: true, optionalEnv: true }
10860
+ );
10861
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10862
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10863
+ }
10864
+ if (cwd && workspaceTemplate) {
10865
+ throw new Error(`${target.name}: 'cwd' and 'workspace_template' are mutually exclusive.`);
10866
+ }
10867
+ const timeoutMs = resolveTimeoutMs(timeoutSource, `${target.name} pi-cli timeout`);
10868
+ const logDir = resolveOptionalString(logDirSource, env, `${target.name} pi-cli log directory`, {
10869
+ allowLiteral: true,
10870
+ optionalEnv: true
10871
+ });
10872
+ const logFormat = logFormatSource === "json" || logFormatSource === "summary" ? logFormatSource : void 0;
10648
10873
  const systemPrompt = typeof systemPromptSource === "string" && systemPromptSource.trim().length > 0 ? systemPromptSource.trim() : void 0;
10649
10874
  return {
10875
+ executable,
10650
10876
  subprovider,
10651
10877
  model,
10652
10878
  apiKey,
10879
+ tools,
10880
+ thinking,
10881
+ args,
10882
+ cwd,
10883
+ workspaceTemplate,
10653
10884
  timeoutMs,
10885
+ logDir,
10886
+ logFormat,
10654
10887
  systemPrompt
10655
10888
  };
10656
10889
  }
@@ -10679,8 +10912,8 @@ function resolveClaudeConfig(target, env, evalFilePath) {
10679
10912
  optionalEnv: true
10680
10913
  }
10681
10914
  );
10682
- if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10683
- workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10915
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10916
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10684
10917
  }
10685
10918
  if (cwd && workspaceTemplate) {
10686
10919
  throw new Error(
@@ -10738,8 +10971,8 @@ function resolveVSCodeConfig(target, env, insiders, evalFilePath) {
10738
10971
  optionalEnv: true
10739
10972
  }
10740
10973
  ) : void 0;
10741
- if (workspaceTemplate && evalFilePath && !import_node_path20.default.isAbsolute(workspaceTemplate)) {
10742
- workspaceTemplate = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), workspaceTemplate);
10974
+ if (workspaceTemplate && evalFilePath && !import_node_path21.default.isAbsolute(workspaceTemplate)) {
10975
+ workspaceTemplate = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), workspaceTemplate);
10743
10976
  }
10744
10977
  const executableSource = target.executable;
10745
10978
  const waitSource = target.wait;
@@ -10780,8 +11013,8 @@ function resolveCliConfig(target, env, evalFilePath) {
10780
11013
  const parseResult = CliTargetInputSchema.safeParse(target, { errorMap: cliErrorMap });
10781
11014
  if (!parseResult.success) {
10782
11015
  const firstError = parseResult.error.errors[0];
10783
- const path47 = firstError?.path.join(".") || "";
10784
- const prefix = path47 ? `${target.name} ${path47}: ` : `${target.name}: `;
11016
+ const path48 = firstError?.path.join(".") || "";
11017
+ const prefix = path48 ? `${target.name} ${path48}: ` : `${target.name}: `;
10785
11018
  throw new Error(`${prefix}${firstError?.message}`);
10786
11019
  }
10787
11020
  const normalized = normalizeCliTargetInput(parseResult.data, env, evalFilePath);
@@ -10802,11 +11035,11 @@ function resolveDiscoveredProviderConfig(target, providerKind, env, evalFilePath
10802
11035
  allowLiteral: true,
10803
11036
  optionalEnv: true
10804
11037
  });
10805
- if (cwd && evalFilePath && !import_node_path20.default.isAbsolute(cwd)) {
10806
- cwd = import_node_path20.default.resolve(import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath)), cwd);
11038
+ if (cwd && evalFilePath && !import_node_path21.default.isAbsolute(cwd)) {
11039
+ cwd = import_node_path21.default.resolve(import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath)), cwd);
10807
11040
  }
10808
11041
  if (!cwd && evalFilePath) {
10809
- cwd = import_node_path20.default.dirname(import_node_path20.default.resolve(evalFilePath));
11042
+ cwd = import_node_path21.default.dirname(import_node_path21.default.resolve(evalFilePath));
10810
11043
  }
10811
11044
  return {
10812
11045
  command,
@@ -10989,41 +11222,41 @@ function resolveOptionalNumberArray(source, description) {
10989
11222
  }
10990
11223
 
10991
11224
  // src/evaluation/providers/vscode-provider.ts
10992
- var import_node_child_process6 = require("child_process");
10993
- var import_promises23 = require("fs/promises");
10994
- var import_node_path32 = __toESM(require("path"), 1);
11225
+ var import_node_child_process7 = require("child_process");
11226
+ var import_promises24 = require("fs/promises");
11227
+ var import_node_path33 = __toESM(require("path"), 1);
10995
11228
  var import_node_util3 = require("util");
10996
11229
 
10997
11230
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
10998
- var import_promises21 = require("fs/promises");
10999
- var import_node_path30 = __toESM(require("path"), 1);
11231
+ var import_promises22 = require("fs/promises");
11232
+ var import_node_path31 = __toESM(require("path"), 1);
11000
11233
 
11001
11234
  // src/evaluation/providers/vscode/utils/fs.ts
11002
- var import_node_fs9 = require("fs");
11003
- var import_promises17 = require("fs/promises");
11004
- var import_node_path21 = __toESM(require("path"), 1);
11235
+ var import_node_fs10 = require("fs");
11236
+ var import_promises18 = require("fs/promises");
11237
+ var import_node_path22 = __toESM(require("path"), 1);
11005
11238
  async function pathExists(target) {
11006
11239
  try {
11007
- await (0, import_promises17.access)(target, import_node_fs9.constants.F_OK);
11240
+ await (0, import_promises18.access)(target, import_node_fs10.constants.F_OK);
11008
11241
  return true;
11009
11242
  } catch {
11010
11243
  return false;
11011
11244
  }
11012
11245
  }
11013
11246
  async function ensureDir(target) {
11014
- await (0, import_promises17.mkdir)(target, { recursive: true });
11247
+ await (0, import_promises18.mkdir)(target, { recursive: true });
11015
11248
  }
11016
11249
  async function readDirEntries(target) {
11017
- const entries = await (0, import_promises17.readdir)(target, { withFileTypes: true });
11250
+ const entries = await (0, import_promises18.readdir)(target, { withFileTypes: true });
11018
11251
  return entries.map((entry) => ({
11019
11252
  name: entry.name,
11020
- absolutePath: import_node_path21.default.join(target, entry.name),
11253
+ absolutePath: import_node_path22.default.join(target, entry.name),
11021
11254
  isDirectory: entry.isDirectory()
11022
11255
  }));
11023
11256
  }
11024
11257
  async function removeIfExists(target) {
11025
11258
  try {
11026
- await (0, import_promises17.rm)(target, { force: true, recursive: false });
11259
+ await (0, import_promises18.rm)(target, { force: true, recursive: false });
11027
11260
  } catch (error) {
11028
11261
  if (error.code !== "ENOENT") {
11029
11262
  throw error;
@@ -11032,9 +11265,9 @@ async function removeIfExists(target) {
11032
11265
  }
11033
11266
 
11034
11267
  // src/evaluation/providers/vscode/utils/path.ts
11035
- var import_node_path22 = __toESM(require("path"), 1);
11268
+ var import_node_path23 = __toESM(require("path"), 1);
11036
11269
  function pathToFileUri2(filePath) {
11037
- const absolutePath = import_node_path22.default.isAbsolute(filePath) ? filePath : import_node_path22.default.resolve(filePath);
11270
+ const absolutePath = import_node_path23.default.isAbsolute(filePath) ? filePath : import_node_path23.default.resolve(filePath);
11038
11271
  const normalizedPath = absolutePath.replace(/\\/g, "/");
11039
11272
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
11040
11273
  return `file:///${normalizedPath}`;
@@ -11043,7 +11276,7 @@ function pathToFileUri2(filePath) {
11043
11276
  }
11044
11277
 
11045
11278
  // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
11046
- var import_node_path23 = __toESM(require("path"), 1);
11279
+ var import_node_path24 = __toESM(require("path"), 1);
11047
11280
 
11048
11281
  // src/evaluation/providers/vscode/utils/template.ts
11049
11282
  function renderTemplate2(content, variables) {
@@ -11135,8 +11368,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
11135
11368
  });
11136
11369
  }
11137
11370
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
11138
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path23.default.basename(file)}`).join("\n");
11139
- const responseList = responseFiles.map((file) => `"${import_node_path23.default.basename(file)}"`).join(", ");
11371
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${import_node_path24.default.basename(file)}`).join("\n");
11372
+ const responseList = responseFiles.map((file) => `"${import_node_path24.default.basename(file)}"`).join(", ");
11140
11373
  return renderTemplate2(templateContent, {
11141
11374
  requestFiles: requestLines,
11142
11375
  responseList
@@ -11144,8 +11377,8 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
11144
11377
  }
11145
11378
 
11146
11379
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
11147
- var import_promises18 = require("fs/promises");
11148
- var import_node_path24 = __toESM(require("path"), 1);
11380
+ var import_promises19 = require("fs/promises");
11381
+ var import_node_path25 = __toESM(require("path"), 1);
11149
11382
 
11150
11383
  // src/evaluation/providers/vscode/utils/time.ts
11151
11384
  function sleep2(ms) {
@@ -11183,7 +11416,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
11183
11416
  const maxAttempts = 10;
11184
11417
  while (attempts < maxAttempts) {
11185
11418
  try {
11186
- const content = await (0, import_promises18.readFile)(responseFileFinal, { encoding: "utf8" });
11419
+ const content = await (0, import_promises19.readFile)(responseFileFinal, { encoding: "utf8" });
11187
11420
  if (!silent) {
11188
11421
  process.stdout.write(`${content}
11189
11422
  `);
@@ -11204,7 +11437,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
11204
11437
  }
11205
11438
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
11206
11439
  if (!silent) {
11207
- const fileList = responseFilesFinal.map((file) => import_node_path24.default.basename(file)).join(", ");
11440
+ const fileList = responseFilesFinal.map((file) => import_node_path25.default.basename(file)).join(", ");
11208
11441
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
11209
11442
  }
11210
11443
  const deadline = Date.now() + timeoutMs;
@@ -11213,7 +11446,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
11213
11446
  while (pending.size > 0) {
11214
11447
  if (Date.now() >= deadline) {
11215
11448
  if (!silent) {
11216
- const remaining = [...pending].map((f) => import_node_path24.default.basename(f)).join(", ");
11449
+ const remaining = [...pending].map((f) => import_node_path25.default.basename(f)).join(", ");
11217
11450
  console.error(
11218
11451
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
11219
11452
  );
@@ -11240,7 +11473,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
11240
11473
  const maxAttempts = 10;
11241
11474
  while (attempts < maxAttempts) {
11242
11475
  try {
11243
- const content = await (0, import_promises18.readFile)(file, { encoding: "utf8" });
11476
+ const content = await (0, import_promises19.readFile)(file, { encoding: "utf8" });
11244
11477
  if (!silent) {
11245
11478
  process.stdout.write(`${content}
11246
11479
  `);
@@ -11262,17 +11495,17 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
11262
11495
  }
11263
11496
 
11264
11497
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
11265
- var import_node_child_process5 = require("child_process");
11266
- var import_promises19 = require("fs/promises");
11267
- var import_node_path27 = __toESM(require("path"), 1);
11498
+ var import_node_child_process6 = require("child_process");
11499
+ var import_promises20 = require("fs/promises");
11500
+ var import_node_path28 = __toESM(require("path"), 1);
11268
11501
  var import_node_util2 = require("util");
11269
11502
 
11270
11503
  // src/evaluation/providers/vscode/dispatch/constants.ts
11271
- var import_node_path26 = __toESM(require("path"), 1);
11504
+ var import_node_path27 = __toESM(require("path"), 1);
11272
11505
 
11273
11506
  // src/paths.ts
11274
11507
  var import_node_os4 = __toESM(require("os"), 1);
11275
- var import_node_path25 = __toESM(require("path"), 1);
11508
+ var import_node_path26 = __toESM(require("path"), 1);
11276
11509
  var logged = false;
11277
11510
  function getAgentvHome() {
11278
11511
  const envHome = process.env.AGENTV_HOME;
@@ -11283,19 +11516,19 @@ function getAgentvHome() {
11283
11516
  }
11284
11517
  return envHome;
11285
11518
  }
11286
- return import_node_path25.default.join(import_node_os4.default.homedir(), ".agentv");
11519
+ return import_node_path26.default.join(import_node_os4.default.homedir(), ".agentv");
11287
11520
  }
11288
11521
  function getWorkspacesRoot() {
11289
- return import_node_path25.default.join(getAgentvHome(), "workspaces");
11522
+ return import_node_path26.default.join(getAgentvHome(), "workspaces");
11290
11523
  }
11291
11524
  function getSubagentsRoot() {
11292
- return import_node_path25.default.join(getAgentvHome(), "subagents");
11525
+ return import_node_path26.default.join(getAgentvHome(), "subagents");
11293
11526
  }
11294
11527
  function getTraceStateRoot() {
11295
- return import_node_path25.default.join(getAgentvHome(), "trace-state");
11528
+ return import_node_path26.default.join(getAgentvHome(), "trace-state");
11296
11529
  }
11297
11530
  function getWorkspacePoolRoot() {
11298
- return import_node_path25.default.join(getAgentvHome(), "workspace-pool");
11531
+ return import_node_path26.default.join(getAgentvHome(), "workspace-pool");
11299
11532
  }
11300
11533
 
11301
11534
  // src/evaluation/providers/vscode/dispatch/constants.ts
@@ -11303,12 +11536,12 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
11303
11536
  var DEFAULT_ALIVE_FILENAME = ".alive";
11304
11537
  function getDefaultSubagentRoot(vscodeCmd = "code") {
11305
11538
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
11306
- return import_node_path26.default.join(getSubagentsRoot(), folder);
11539
+ return import_node_path27.default.join(getSubagentsRoot(), folder);
11307
11540
  }
11308
11541
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
11309
11542
 
11310
11543
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
11311
- var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process5.exec);
11544
+ var execAsync2 = (0, import_node_util2.promisify)(import_node_child_process6.exec);
11312
11545
  function shellQuote(cmd) {
11313
11546
  return cmd.includes(" ") ? `"${cmd}"` : cmd;
11314
11547
  }
@@ -11319,7 +11552,7 @@ model: Grok Code Fast 1 (copilot)
11319
11552
  function spawnVsCode(vscodeCmd, args, options) {
11320
11553
  const useShell = options?.shell ?? true;
11321
11554
  const command = useShell ? shellQuote(vscodeCmd) : vscodeCmd;
11322
- const child = (0, import_node_child_process5.spawn)(command, args, {
11555
+ const child = (0, import_node_child_process6.spawn)(command, args, {
11323
11556
  windowsHide: true,
11324
11557
  shell: useShell,
11325
11558
  detached: false
@@ -11370,12 +11603,12 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
11370
11603
  await raceSpawnError(child);
11371
11604
  return true;
11372
11605
  }
11373
- const aliveFile = import_node_path27.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
11606
+ const aliveFile = import_node_path28.default.join(subagentDir, DEFAULT_ALIVE_FILENAME);
11374
11607
  await removeIfExists(aliveFile);
11375
- const githubAgentsDir = import_node_path27.default.join(subagentDir, ".github", "agents");
11376
- await (0, import_promises19.mkdir)(githubAgentsDir, { recursive: true });
11377
- const wakeupDst = import_node_path27.default.join(githubAgentsDir, "wakeup.md");
11378
- await (0, import_promises19.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
11608
+ const githubAgentsDir = import_node_path28.default.join(subagentDir, ".github", "agents");
11609
+ await (0, import_promises20.mkdir)(githubAgentsDir, { recursive: true });
11610
+ const wakeupDst = import_node_path28.default.join(githubAgentsDir, "wakeup.md");
11611
+ await (0, import_promises20.writeFile)(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
11379
11612
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
11380
11613
  label: "open-workspace"
11381
11614
  });
@@ -11387,7 +11620,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
11387
11620
  "chat",
11388
11621
  "-m",
11389
11622
  wakeupChatId,
11390
- `create a file named .alive in the ${import_node_path27.default.basename(subagentDir)} folder`
11623
+ `create a file named .alive in the ${import_node_path28.default.basename(subagentDir)} folder`
11391
11624
  ];
11392
11625
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
11393
11626
  await raceSpawnError(wakeupChild);
@@ -11402,27 +11635,27 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
11402
11635
  return true;
11403
11636
  }
11404
11637
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
11405
- const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
11406
- const messagesDir = import_node_path27.default.join(subagentDir, "messages");
11407
- await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
11408
- const reqFile = import_node_path27.default.join(messagesDir, `${timestamp}_req.md`);
11409
- await (0, import_promises19.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
11638
+ const workspacePath = import_node_path28.default.join(subagentDir, `${import_node_path28.default.basename(subagentDir)}.code-workspace`);
11639
+ const messagesDir = import_node_path28.default.join(subagentDir, "messages");
11640
+ await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
11641
+ const reqFile = import_node_path28.default.join(messagesDir, `${timestamp}_req.md`);
11642
+ await (0, import_promises20.writeFile)(reqFile, requestInstructions, { encoding: "utf8" });
11410
11643
  const reqUri = pathToFileUri2(reqFile);
11411
11644
  const chatArgs = ["-r", "chat", "-m", chatId];
11412
11645
  for (const attachment of attachmentPaths) {
11413
11646
  chatArgs.push("-a", attachment);
11414
11647
  }
11415
11648
  chatArgs.push("-a", reqFile);
11416
- chatArgs.push(`Follow instructions in [${import_node_path27.default.basename(reqFile)}](${reqUri})`);
11649
+ chatArgs.push(`Follow instructions in [${import_node_path28.default.basename(reqFile)}](${reqUri})`);
11417
11650
  const workspaceReady = await ensureWorkspaceFocused(
11418
11651
  workspacePath,
11419
- import_node_path27.default.basename(subagentDir),
11652
+ import_node_path28.default.basename(subagentDir),
11420
11653
  subagentDir,
11421
11654
  vscodeCmd
11422
11655
  );
11423
11656
  if (!workspaceReady) {
11424
11657
  throw new Error(
11425
- `VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11658
+ `VS Code workspace '${import_node_path28.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11426
11659
  );
11427
11660
  }
11428
11661
  await sleep2(500);
@@ -11430,9 +11663,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
11430
11663
  await raceSpawnError(child);
11431
11664
  }
11432
11665
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
11433
- const workspacePath = import_node_path27.default.join(subagentDir, `${import_node_path27.default.basename(subagentDir)}.code-workspace`);
11434
- const messagesDir = import_node_path27.default.join(subagentDir, "messages");
11435
- await (0, import_promises19.mkdir)(messagesDir, { recursive: true });
11666
+ const workspacePath = import_node_path28.default.join(subagentDir, `${import_node_path28.default.basename(subagentDir)}.code-workspace`);
11667
+ const messagesDir = import_node_path28.default.join(subagentDir, "messages");
11668
+ await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
11436
11669
  const chatArgs = ["-r", "chat", "-m", chatId];
11437
11670
  for (const attachment of attachmentPaths) {
11438
11671
  chatArgs.push("-a", attachment);
@@ -11440,13 +11673,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
11440
11673
  chatArgs.push(chatInstruction);
11441
11674
  const workspaceReady = await ensureWorkspaceFocused(
11442
11675
  workspacePath,
11443
- import_node_path27.default.basename(subagentDir),
11676
+ import_node_path28.default.basename(subagentDir),
11444
11677
  subagentDir,
11445
11678
  vscodeCmd
11446
11679
  );
11447
11680
  if (!workspaceReady) {
11448
11681
  throw new Error(
11449
- `VS Code workspace '${import_node_path27.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11682
+ `VS Code workspace '${import_node_path28.default.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
11450
11683
  );
11451
11684
  }
11452
11685
  await sleep2(500);
@@ -11455,11 +11688,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
11455
11688
  }
11456
11689
 
11457
11690
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
11458
- var import_promises20 = require("fs/promises");
11459
- var import_node_path29 = __toESM(require("path"), 1);
11691
+ var import_promises21 = require("fs/promises");
11692
+ var import_node_path30 = __toESM(require("path"), 1);
11460
11693
 
11461
11694
  // src/evaluation/providers/vscode/utils/workspace.ts
11462
- var import_node_path28 = __toESM(require("path"), 1);
11695
+ var import_node_path29 = __toESM(require("path"), 1);
11463
11696
  var import_json5 = __toESM(require("json5"), 1);
11464
11697
  function transformWorkspacePaths(workspaceContent, templateDir) {
11465
11698
  let workspace;
@@ -11476,10 +11709,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
11476
11709
  }
11477
11710
  const transformedFolders = workspace.folders.map((folder) => {
11478
11711
  const folderPath = folder.path;
11479
- if (import_node_path28.default.isAbsolute(folderPath)) {
11712
+ if (import_node_path29.default.isAbsolute(folderPath)) {
11480
11713
  return folder;
11481
11714
  }
11482
- const absolutePath = import_node_path28.default.resolve(templateDir, folderPath);
11715
+ const absolutePath = import_node_path29.default.resolve(templateDir, folderPath);
11483
11716
  return {
11484
11717
  ...folder,
11485
11718
  path: absolutePath
@@ -11501,19 +11734,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
11501
11734
  if (locationMap && typeof locationMap === "object") {
11502
11735
  const transformedMap = {};
11503
11736
  for (const [locationPath, value] of Object.entries(locationMap)) {
11504
- const isAbsolute = import_node_path28.default.isAbsolute(locationPath);
11737
+ const isAbsolute = import_node_path29.default.isAbsolute(locationPath);
11505
11738
  if (isAbsolute) {
11506
11739
  transformedMap[locationPath] = value;
11507
11740
  } else {
11508
11741
  const firstGlobIndex = locationPath.search(/[*]/);
11509
11742
  if (firstGlobIndex === -1) {
11510
- const resolvedPath = import_node_path28.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
11743
+ const resolvedPath = import_node_path29.default.resolve(templateDir, locationPath).replace(/\\/g, "/");
11511
11744
  transformedMap[resolvedPath] = value;
11512
11745
  } else {
11513
11746
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
11514
11747
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
11515
11748
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
11516
- const resolvedPath = (import_node_path28.default.resolve(templateDir, basePath) + patternPath).replace(
11749
+ const resolvedPath = (import_node_path29.default.resolve(templateDir, basePath) + patternPath).replace(
11517
11750
  /\\/g,
11518
11751
  "/"
11519
11752
  );
@@ -11554,7 +11787,7 @@ async function findUnlockedSubagent(subagentRoot) {
11554
11787
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
11555
11788
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
11556
11789
  for (const subagent of subagents) {
11557
- const lockFile = import_node_path29.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
11790
+ const lockFile = import_node_path30.default.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
11558
11791
  if (!await pathExists(lockFile)) {
11559
11792
  return subagent.absolutePath;
11560
11793
  }
@@ -11564,26 +11797,26 @@ async function findUnlockedSubagent(subagentRoot) {
11564
11797
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
11565
11798
  let workspaceContent;
11566
11799
  if (workspaceTemplate) {
11567
- const workspaceSrc = import_node_path29.default.resolve(workspaceTemplate);
11800
+ const workspaceSrc = import_node_path30.default.resolve(workspaceTemplate);
11568
11801
  if (!await pathExists(workspaceSrc)) {
11569
11802
  throw new Error(`workspace template not found: ${workspaceSrc}`);
11570
11803
  }
11571
- const stats = await (0, import_promises20.stat)(workspaceSrc);
11804
+ const stats = await (0, import_promises21.stat)(workspaceSrc);
11572
11805
  if (!stats.isFile()) {
11573
11806
  throw new Error(`workspace template must be a file, not a directory: ${workspaceSrc}`);
11574
11807
  }
11575
- const templateText = await (0, import_promises20.readFile)(workspaceSrc, "utf8");
11808
+ const templateText = await (0, import_promises21.readFile)(workspaceSrc, "utf8");
11576
11809
  workspaceContent = JSON.parse(templateText);
11577
11810
  } else {
11578
11811
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
11579
11812
  }
11580
- const workspaceName = `${import_node_path29.default.basename(subagentDir)}.code-workspace`;
11581
- const workspaceDst = import_node_path29.default.join(subagentDir, workspaceName);
11582
- const templateDir = workspaceTemplate ? import_node_path29.default.dirname(import_node_path29.default.resolve(workspaceTemplate)) : subagentDir;
11813
+ const workspaceName = `${import_node_path30.default.basename(subagentDir)}.code-workspace`;
11814
+ const workspaceDst = import_node_path30.default.join(subagentDir, workspaceName);
11815
+ const templateDir = workspaceTemplate ? import_node_path30.default.dirname(import_node_path30.default.resolve(workspaceTemplate)) : subagentDir;
11583
11816
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
11584
11817
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
11585
11818
  if (cwd) {
11586
- const absCwd = import_node_path29.default.resolve(cwd);
11819
+ const absCwd = import_node_path30.default.resolve(cwd);
11587
11820
  const parsed = JSON.parse(transformedContent);
11588
11821
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
11589
11822
  if (!alreadyPresent) {
@@ -11591,36 +11824,36 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
11591
11824
  transformedContent = JSON.stringify(parsed, null, 2);
11592
11825
  }
11593
11826
  }
11594
- await (0, import_promises20.writeFile)(workspaceDst, transformedContent, "utf8");
11595
- const messagesDir = import_node_path29.default.join(subagentDir, "messages");
11596
- await (0, import_promises20.mkdir)(messagesDir, { recursive: true });
11827
+ await (0, import_promises21.writeFile)(workspaceDst, transformedContent, "utf8");
11828
+ const messagesDir = import_node_path30.default.join(subagentDir, "messages");
11829
+ await (0, import_promises21.mkdir)(messagesDir, { recursive: true });
11597
11830
  return { workspace: workspaceDst, messagesDir };
11598
11831
  }
11599
11832
  async function createSubagentLock(subagentDir) {
11600
- const messagesDir = import_node_path29.default.join(subagentDir, "messages");
11833
+ const messagesDir = import_node_path30.default.join(subagentDir, "messages");
11601
11834
  if (await pathExists(messagesDir)) {
11602
- const files = await (0, import_promises20.readdir)(messagesDir);
11835
+ const files = await (0, import_promises21.readdir)(messagesDir);
11603
11836
  await Promise.all(
11604
11837
  files.map(async (file) => {
11605
- const target = import_node_path29.default.join(messagesDir, file);
11838
+ const target = import_node_path30.default.join(messagesDir, file);
11606
11839
  await removeIfExists(target);
11607
11840
  })
11608
11841
  );
11609
11842
  }
11610
- const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
11843
+ const githubAgentsDir = import_node_path30.default.join(subagentDir, ".github", "agents");
11611
11844
  if (await pathExists(githubAgentsDir)) {
11612
- const agentFiles = await (0, import_promises20.readdir)(githubAgentsDir);
11845
+ const agentFiles = await (0, import_promises21.readdir)(githubAgentsDir);
11613
11846
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
11614
11847
  await Promise.all(
11615
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path29.default.join(githubAgentsDir, file)))
11848
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(import_node_path30.default.join(githubAgentsDir, file)))
11616
11849
  );
11617
11850
  }
11618
- const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
11619
- await (0, import_promises20.writeFile)(lockFile, "", { encoding: "utf8" });
11851
+ const lockFile = import_node_path30.default.join(subagentDir, DEFAULT_LOCK_NAME);
11852
+ await (0, import_promises21.writeFile)(lockFile, "", { encoding: "utf8" });
11620
11853
  return lockFile;
11621
11854
  }
11622
11855
  async function removeSubagentLock(subagentDir) {
11623
- const lockFile = import_node_path29.default.join(subagentDir, DEFAULT_LOCK_NAME);
11856
+ const lockFile = import_node_path30.default.join(subagentDir, DEFAULT_LOCK_NAME);
11624
11857
  await removeIfExists(lockFile);
11625
11858
  }
11626
11859
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -11640,11 +11873,11 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
11640
11873
  return 1;
11641
11874
  }
11642
11875
  if (promptFile) {
11643
- const githubAgentsDir = import_node_path29.default.join(subagentDir, ".github", "agents");
11644
- await (0, import_promises20.mkdir)(githubAgentsDir, { recursive: true });
11645
- const agentFile = import_node_path29.default.join(githubAgentsDir, `${chatId}.md`);
11876
+ const githubAgentsDir = import_node_path30.default.join(subagentDir, ".github", "agents");
11877
+ await (0, import_promises21.mkdir)(githubAgentsDir, { recursive: true });
11878
+ const agentFile = import_node_path30.default.join(githubAgentsDir, `${chatId}.md`);
11646
11879
  try {
11647
- await (0, import_promises20.copyFile)(promptFile, agentFile);
11880
+ await (0, import_promises21.copyFile)(promptFile, agentFile);
11648
11881
  } catch (error) {
11649
11882
  console.error(`error: Failed to copy prompt file to agent mode: ${error.message}`);
11650
11883
  return 1;
@@ -11661,11 +11894,11 @@ async function resolvePromptFile(promptFile) {
11661
11894
  if (!promptFile) {
11662
11895
  return void 0;
11663
11896
  }
11664
- const resolvedPrompt = import_node_path30.default.resolve(promptFile);
11897
+ const resolvedPrompt = import_node_path31.default.resolve(promptFile);
11665
11898
  if (!await pathExists(resolvedPrompt)) {
11666
11899
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
11667
11900
  }
11668
- const promptStats = await (0, import_promises21.stat)(resolvedPrompt);
11901
+ const promptStats = await (0, import_promises22.stat)(resolvedPrompt);
11669
11902
  if (!promptStats.isFile()) {
11670
11903
  throw new Error(`Prompt file must be a file, not a directory: ${resolvedPrompt}`);
11671
11904
  }
@@ -11677,7 +11910,7 @@ async function resolveAttachments(extraAttachments) {
11677
11910
  }
11678
11911
  const resolved = [];
11679
11912
  for (const attachment of extraAttachments) {
11680
- const resolvedPath = import_node_path30.default.resolve(attachment);
11913
+ const resolvedPath = import_node_path31.default.resolve(attachment);
11681
11914
  if (!await pathExists(resolvedPath)) {
11682
11915
  throw new Error(`Attachment not found: ${resolvedPath}`);
11683
11916
  }
@@ -11719,7 +11952,7 @@ async function dispatchAgentSession(options) {
11719
11952
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
11720
11953
  };
11721
11954
  }
11722
- const subagentName = import_node_path30.default.basename(subagentDir);
11955
+ const subagentName = import_node_path31.default.basename(subagentDir);
11723
11956
  const chatId = Math.random().toString(16).slice(2, 10);
11724
11957
  const preparationResult = await prepareSubagentDirectory(
11725
11958
  subagentDir,
@@ -11747,9 +11980,9 @@ async function dispatchAgentSession(options) {
11747
11980
  };
11748
11981
  }
11749
11982
  const timestamp = generateTimestamp();
11750
- const messagesDir = import_node_path30.default.join(subagentDir, "messages");
11751
- const responseFileTmp = import_node_path30.default.join(messagesDir, `${timestamp}_res.tmp.md`);
11752
- const responseFileFinal = import_node_path30.default.join(messagesDir, `${timestamp}_res.md`);
11983
+ const messagesDir = import_node_path31.default.join(subagentDir, "messages");
11984
+ const responseFileTmp = import_node_path31.default.join(messagesDir, `${timestamp}_res.tmp.md`);
11985
+ const responseFileFinal = import_node_path31.default.join(messagesDir, `${timestamp}_res.md`);
11753
11986
  const requestInstructions = createRequestPrompt(
11754
11987
  userQuery,
11755
11988
  responseFileTmp,
@@ -11854,7 +12087,7 @@ async function dispatchBatchAgent(options) {
11854
12087
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
11855
12088
  };
11856
12089
  }
11857
- subagentName = import_node_path30.default.basename(subagentDir);
12090
+ subagentName = import_node_path31.default.basename(subagentDir);
11858
12091
  const chatId = Math.random().toString(16).slice(2, 10);
11859
12092
  const preparationResult = await prepareSubagentDirectory(
11860
12093
  subagentDir,
@@ -11885,24 +12118,24 @@ async function dispatchBatchAgent(options) {
11885
12118
  };
11886
12119
  }
11887
12120
  const timestamp = generateTimestamp();
11888
- const messagesDir = import_node_path30.default.join(subagentDir, "messages");
12121
+ const messagesDir = import_node_path31.default.join(subagentDir, "messages");
11889
12122
  requestFiles = userQueries.map(
11890
- (_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_req.md`)
12123
+ (_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_req.md`)
11891
12124
  );
11892
12125
  const responseTmpFiles = userQueries.map(
11893
- (_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
12126
+ (_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
11894
12127
  );
11895
12128
  responseFilesFinal = userQueries.map(
11896
- (_, index) => import_node_path30.default.join(messagesDir, `${timestamp}_${index}_res.md`)
12129
+ (_, index) => import_node_path31.default.join(messagesDir, `${timestamp}_${index}_res.md`)
11897
12130
  );
11898
- const orchestratorFile = import_node_path30.default.join(messagesDir, `${timestamp}_orchestrator.md`);
12131
+ const orchestratorFile = import_node_path31.default.join(messagesDir, `${timestamp}_orchestrator.md`);
11899
12132
  if (!dryRun) {
11900
12133
  await Promise.all(
11901
12134
  userQueries.map((query, index) => {
11902
12135
  const reqFile = requestFiles[index];
11903
12136
  const tmpFile = responseTmpFiles[index];
11904
12137
  const finalFile = responseFilesFinal[index];
11905
- return (0, import_promises21.writeFile)(
12138
+ return (0, import_promises22.writeFile)(
11906
12139
  reqFile,
11907
12140
  createBatchRequestPrompt(query, tmpFile, finalFile, batchRequestTemplateContent),
11908
12141
  { encoding: "utf8" }
@@ -11914,7 +12147,7 @@ async function dispatchBatchAgent(options) {
11914
12147
  responseFilesFinal,
11915
12148
  orchestratorTemplateContent
11916
12149
  );
11917
- await (0, import_promises21.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
12150
+ await (0, import_promises22.writeFile)(orchestratorFile, orchestratorContent, { encoding: "utf8" });
11918
12151
  }
11919
12152
  const chatAttachments = [orchestratorFile, ...attachments];
11920
12153
  const orchestratorUri = pathToFileUri2(orchestratorFile);
@@ -11980,8 +12213,8 @@ async function dispatchBatchAgent(options) {
11980
12213
  }
11981
12214
 
11982
12215
  // src/evaluation/providers/vscode/dispatch/provision.ts
11983
- var import_promises22 = require("fs/promises");
11984
- var import_node_path31 = __toESM(require("path"), 1);
12216
+ var import_promises23 = require("fs/promises");
12217
+ var import_node_path32 = __toESM(require("path"), 1);
11985
12218
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
11986
12219
  folders: [
11987
12220
  {
@@ -12012,7 +12245,7 @@ async function provisionSubagents(options) {
12012
12245
  if (!Number.isInteger(subagents) || subagents < 1) {
12013
12246
  throw new Error("subagents must be a positive integer");
12014
12247
  }
12015
- const targetPath = import_node_path31.default.resolve(targetRoot);
12248
+ const targetPath = import_node_path32.default.resolve(targetRoot);
12016
12249
  if (!dryRun) {
12017
12250
  await ensureDir(targetPath);
12018
12251
  }
@@ -12032,7 +12265,7 @@ async function provisionSubagents(options) {
12032
12265
  continue;
12033
12266
  }
12034
12267
  highestNumber = Math.max(highestNumber, parsed);
12035
- const lockFile = import_node_path31.default.join(entry.absolutePath, lockName);
12268
+ const lockFile = import_node_path32.default.join(entry.absolutePath, lockName);
12036
12269
  const locked = await pathExists(lockFile);
12037
12270
  if (locked) {
12038
12271
  lockedSubagents.add(entry.absolutePath);
@@ -12049,10 +12282,10 @@ async function provisionSubagents(options) {
12049
12282
  break;
12050
12283
  }
12051
12284
  const subagentDir = subagent.absolutePath;
12052
- const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
12053
- const lockFile = import_node_path31.default.join(subagentDir, lockName);
12054
- const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
12055
- const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
12285
+ const githubAgentsDir = import_node_path32.default.join(subagentDir, ".github", "agents");
12286
+ const lockFile = import_node_path32.default.join(subagentDir, lockName);
12287
+ const workspaceDst = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
12288
+ const wakeupDst = import_node_path32.default.join(githubAgentsDir, "wakeup.md");
12056
12289
  const isLocked = await pathExists(lockFile);
12057
12290
  if (isLocked && !force) {
12058
12291
  continue;
@@ -12061,8 +12294,8 @@ async function provisionSubagents(options) {
12061
12294
  if (!dryRun) {
12062
12295
  await removeIfExists(lockFile);
12063
12296
  await ensureDir(githubAgentsDir);
12064
- await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12065
- await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12297
+ await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12298
+ await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12066
12299
  }
12067
12300
  created.push(subagentDir);
12068
12301
  lockedSubagents.delete(subagentDir);
@@ -12072,8 +12305,8 @@ async function provisionSubagents(options) {
12072
12305
  if (!isLocked && force) {
12073
12306
  if (!dryRun) {
12074
12307
  await ensureDir(githubAgentsDir);
12075
- await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12076
- await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12308
+ await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12309
+ await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12077
12310
  }
12078
12311
  created.push(subagentDir);
12079
12312
  subagentsProvisioned += 1;
@@ -12081,8 +12314,8 @@ async function provisionSubagents(options) {
12081
12314
  }
12082
12315
  if (!dryRun && !await pathExists(workspaceDst)) {
12083
12316
  await ensureDir(githubAgentsDir);
12084
- await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12085
- await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12317
+ await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12318
+ await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12086
12319
  }
12087
12320
  skippedExisting.push(subagentDir);
12088
12321
  subagentsProvisioned += 1;
@@ -12090,15 +12323,15 @@ async function provisionSubagents(options) {
12090
12323
  let nextIndex = highestNumber;
12091
12324
  while (subagentsProvisioned < subagents) {
12092
12325
  nextIndex += 1;
12093
- const subagentDir = import_node_path31.default.join(targetPath, `subagent-${nextIndex}`);
12094
- const githubAgentsDir = import_node_path31.default.join(subagentDir, ".github", "agents");
12095
- const workspaceDst = import_node_path31.default.join(subagentDir, `${import_node_path31.default.basename(subagentDir)}.code-workspace`);
12096
- const wakeupDst = import_node_path31.default.join(githubAgentsDir, "wakeup.md");
12326
+ const subagentDir = import_node_path32.default.join(targetPath, `subagent-${nextIndex}`);
12327
+ const githubAgentsDir = import_node_path32.default.join(subagentDir, ".github", "agents");
12328
+ const workspaceDst = import_node_path32.default.join(subagentDir, `${import_node_path32.default.basename(subagentDir)}.code-workspace`);
12329
+ const wakeupDst = import_node_path32.default.join(githubAgentsDir, "wakeup.md");
12097
12330
  if (!dryRun) {
12098
12331
  await ensureDir(subagentDir);
12099
12332
  await ensureDir(githubAgentsDir);
12100
- await (0, import_promises22.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12101
- await (0, import_promises22.writeFile)(wakeupDst, wakeupContent, "utf8");
12333
+ await (0, import_promises23.writeFile)(workspaceDst, JSON.stringify(workspaceTemplate, null, 2), "utf8");
12334
+ await (0, import_promises23.writeFile)(wakeupDst, wakeupContent, "utf8");
12102
12335
  }
12103
12336
  created.push(subagentDir);
12104
12337
  subagentsProvisioned += 1;
@@ -12140,7 +12373,7 @@ var AGENTV_BATCH_REQUEST_TEMPLATE = `[[ ## task ## ]]
12140
12373
  `;
12141
12374
 
12142
12375
  // src/evaluation/providers/vscode-provider.ts
12143
- var execAsync3 = (0, import_node_util3.promisify)(import_node_child_process6.exec);
12376
+ var execAsync3 = (0, import_node_util3.promisify)(import_node_child_process7.exec);
12144
12377
  var VSCodeProvider = class {
12145
12378
  id;
12146
12379
  kind;
@@ -12283,9 +12516,9 @@ var VSCodeProvider = class {
12283
12516
  async function locateVSCodeExecutable(candidate) {
12284
12517
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
12285
12518
  if (includesPathSeparator) {
12286
- const resolved = import_node_path32.default.isAbsolute(candidate) ? candidate : import_node_path32.default.resolve(candidate);
12519
+ const resolved = import_node_path33.default.isAbsolute(candidate) ? candidate : import_node_path33.default.resolve(candidate);
12287
12520
  try {
12288
- await (0, import_promises23.access)(resolved, import_promises23.constants.F_OK);
12521
+ await (0, import_promises24.access)(resolved, import_promises24.constants.F_OK);
12289
12522
  return resolved;
12290
12523
  } catch {
12291
12524
  throw new Error(
@@ -12298,7 +12531,7 @@ async function locateVSCodeExecutable(candidate) {
12298
12531
  const { stdout } = await execAsync3(`${locator} ${candidate}`);
12299
12532
  const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
12300
12533
  if (lines.length > 0 && lines[0]) {
12301
- await (0, import_promises23.access)(lines[0], import_promises23.constants.F_OK);
12534
+ await (0, import_promises24.access)(lines[0], import_promises24.constants.F_OK);
12302
12535
  return lines[0];
12303
12536
  }
12304
12537
  } catch {
@@ -12312,7 +12545,7 @@ async function resolveWorkspaceTemplateFile(template) {
12312
12545
  return void 0;
12313
12546
  }
12314
12547
  try {
12315
- const stats = await (0, import_promises23.stat)(import_node_path32.default.resolve(template));
12548
+ const stats = await (0, import_promises24.stat)(import_node_path33.default.resolve(template));
12316
12549
  return stats.isFile() ? template : void 0;
12317
12550
  } catch {
12318
12551
  return template;
@@ -12336,7 +12569,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
12336
12569
  return "";
12337
12570
  }
12338
12571
  const buildList = (files) => files.map((absolutePath) => {
12339
- const fileName = import_node_path32.default.basename(absolutePath);
12572
+ const fileName = import_node_path33.default.basename(absolutePath);
12340
12573
  const fileUri = pathToFileUri3(absolutePath);
12341
12574
  return `* [${fileName}](${fileUri})`;
12342
12575
  });
@@ -12357,7 +12590,7 @@ function collectAttachmentFiles(attachments) {
12357
12590
  }
12358
12591
  const unique = /* @__PURE__ */ new Map();
12359
12592
  for (const attachment of attachments) {
12360
- const absolutePath = import_node_path32.default.resolve(attachment);
12593
+ const absolutePath = import_node_path33.default.resolve(attachment);
12361
12594
  if (!unique.has(absolutePath)) {
12362
12595
  unique.set(absolutePath, absolutePath);
12363
12596
  }
@@ -12365,7 +12598,7 @@ function collectAttachmentFiles(attachments) {
12365
12598
  return Array.from(unique.values());
12366
12599
  }
12367
12600
  function pathToFileUri3(filePath) {
12368
- const absolutePath = import_node_path32.default.isAbsolute(filePath) ? filePath : import_node_path32.default.resolve(filePath);
12601
+ const absolutePath = import_node_path33.default.isAbsolute(filePath) ? filePath : import_node_path33.default.resolve(filePath);
12369
12602
  const normalizedPath = absolutePath.replace(/\\/g, "/");
12370
12603
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
12371
12604
  return `file:///${normalizedPath}`;
@@ -12378,7 +12611,7 @@ function normalizeAttachments(attachments) {
12378
12611
  }
12379
12612
  const deduped = /* @__PURE__ */ new Set();
12380
12613
  for (const attachment of attachments) {
12381
- deduped.add(import_node_path32.default.resolve(attachment));
12614
+ deduped.add(import_node_path33.default.resolve(attachment));
12382
12615
  }
12383
12616
  return Array.from(deduped);
12384
12617
  }
@@ -12387,7 +12620,7 @@ function mergeAttachments(all) {
12387
12620
  for (const list of all) {
12388
12621
  if (!list) continue;
12389
12622
  for (const inputFile of list) {
12390
- deduped.add(import_node_path32.default.resolve(inputFile));
12623
+ deduped.add(import_node_path33.default.resolve(inputFile));
12391
12624
  }
12392
12625
  }
12393
12626
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -12434,9 +12667,9 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
12434
12667
  }
12435
12668
 
12436
12669
  // src/evaluation/providers/targets-file.ts
12437
- var import_node_fs10 = require("fs");
12438
- var import_promises24 = require("fs/promises");
12439
- var import_node_path33 = __toESM(require("path"), 1);
12670
+ var import_node_fs11 = require("fs");
12671
+ var import_promises25 = require("fs/promises");
12672
+ var import_node_path34 = __toESM(require("path"), 1);
12440
12673
  var import_yaml6 = require("yaml");
12441
12674
  function isRecord(value) {
12442
12675
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -12466,18 +12699,18 @@ function assertTargetDefinition(value, index, filePath) {
12466
12699
  }
12467
12700
  async function fileExists3(filePath) {
12468
12701
  try {
12469
- await (0, import_promises24.access)(filePath, import_node_fs10.constants.F_OK);
12702
+ await (0, import_promises25.access)(filePath, import_node_fs11.constants.F_OK);
12470
12703
  return true;
12471
12704
  } catch {
12472
12705
  return false;
12473
12706
  }
12474
12707
  }
12475
12708
  async function readTargetDefinitions(filePath) {
12476
- const absolutePath = import_node_path33.default.resolve(filePath);
12709
+ const absolutePath = import_node_path34.default.resolve(filePath);
12477
12710
  if (!await fileExists3(absolutePath)) {
12478
12711
  throw new Error(`targets.yaml not found at ${absolutePath}`);
12479
12712
  }
12480
- const raw = await (0, import_promises24.readFile)(absolutePath, "utf8");
12713
+ const raw = await (0, import_promises25.readFile)(absolutePath, "utf8");
12481
12714
  const parsed = (0, import_yaml6.parse)(raw);
12482
12715
  if (!isRecord(parsed)) {
12483
12716
  throw new Error(`targets.yaml at ${absolutePath} must be a YAML object with a 'targets' field`);
@@ -12493,16 +12726,16 @@ function listTargetNames(definitions) {
12493
12726
  }
12494
12727
 
12495
12728
  // src/evaluation/providers/provider-discovery.ts
12496
- var import_node_path34 = __toESM(require("path"), 1);
12729
+ var import_node_path35 = __toESM(require("path"), 1);
12497
12730
  var import_fast_glob2 = __toESM(require("fast-glob"), 1);
12498
12731
  async function discoverProviders(registry, baseDir) {
12499
12732
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
12500
12733
  const candidateDirs = [];
12501
- let dir = import_node_path34.default.resolve(baseDir);
12502
- const root = import_node_path34.default.parse(dir).root;
12734
+ let dir = import_node_path35.default.resolve(baseDir);
12735
+ const root = import_node_path35.default.parse(dir).root;
12503
12736
  while (dir !== root) {
12504
- candidateDirs.push(import_node_path34.default.join(dir, ".agentv", "providers"));
12505
- dir = import_node_path34.default.dirname(dir);
12737
+ candidateDirs.push(import_node_path35.default.join(dir, ".agentv", "providers"));
12738
+ dir = import_node_path35.default.dirname(dir);
12506
12739
  }
12507
12740
  let files = [];
12508
12741
  for (const providersDir of candidateDirs) {
@@ -12518,7 +12751,7 @@ async function discoverProviders(registry, baseDir) {
12518
12751
  }
12519
12752
  const discoveredKinds = [];
12520
12753
  for (const filePath of files) {
12521
- const basename = import_node_path34.default.basename(filePath);
12754
+ const basename = import_node_path35.default.basename(filePath);
12522
12755
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
12523
12756
  if (registry.has(kindName)) {
12524
12757
  continue;
@@ -12536,7 +12769,7 @@ async function discoverProviders(registry, baseDir) {
12536
12769
  // src/evaluation/providers/index.ts
12537
12770
  function createBuiltinProviderRegistry() {
12538
12771
  const registry = new ProviderRegistry();
12539
- registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-agent-sdk", (t) => new PiAgentSdkProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
12772
+ registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
12540
12773
  "vscode-insiders",
12541
12774
  (t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
12542
12775
  );
@@ -12625,9 +12858,9 @@ function negateScore(score) {
12625
12858
  }
12626
12859
 
12627
12860
  // src/evaluation/evaluators/code-evaluator.ts
12628
- var import_promises25 = require("fs/promises");
12861
+ var import_promises26 = require("fs/promises");
12629
12862
  var import_node_os5 = require("os");
12630
- var import_node_path35 = require("path");
12863
+ var import_node_path36 = require("path");
12631
12864
 
12632
12865
  // src/runtime/exec.ts
12633
12866
  function shellEscapePath(value) {
@@ -12727,15 +12960,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
12727
12960
  });
12728
12961
  }
12729
12962
  async function execShellWithStdin(command, stdinPayload, options = {}) {
12730
- const { mkdir: mkdir17, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
12963
+ const { mkdir: mkdir18, readFile: readFile14, rm: rm6, writeFile: writeFile9 } = await import("fs/promises");
12731
12964
  const { tmpdir: tmpdir3 } = await import("os");
12732
- const path47 = await import("path");
12733
- const { randomUUID: randomUUID9 } = await import("crypto");
12734
- const dir = path47.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
12735
- await mkdir17(dir, { recursive: true });
12736
- const stdinPath = path47.join(dir, "stdin.txt");
12737
- const stdoutPath = path47.join(dir, "stdout.txt");
12738
- const stderrPath = path47.join(dir, "stderr.txt");
12965
+ const path48 = await import("path");
12966
+ const { randomUUID: randomUUID10 } = await import("crypto");
12967
+ const dir = path48.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
12968
+ await mkdir18(dir, { recursive: true });
12969
+ const stdinPath = path48.join(dir, "stdin.txt");
12970
+ const stdoutPath = path48.join(dir, "stdout.txt");
12971
+ const stderrPath = path48.join(dir, "stderr.txt");
12739
12972
  await writeFile9(stdinPath, stdinPayload, "utf8");
12740
12973
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
12741
12974
  const { spawn: spawn5 } = await import("child_process");
@@ -12774,12 +13007,12 @@ async function execShellWithStdin(command, stdinPayload, options = {}) {
12774
13007
  }
12775
13008
 
12776
13009
  // src/runtime/target-proxy.ts
12777
- var import_node_crypto8 = require("crypto");
13010
+ var import_node_crypto9 = require("crypto");
12778
13011
  var import_node_http = require("http");
12779
13012
  var DEFAULT_MAX_CALLS = 50;
12780
13013
  async function createTargetProxy(options) {
12781
13014
  const { defaultProvider, targetResolver, availableTargets, maxCalls } = options;
12782
- const token = (0, import_node_crypto8.randomBytes)(32).toString("hex");
13015
+ const token = (0, import_node_crypto9.randomBytes)(32).toString("hex");
12783
13016
  let callCount = 0;
12784
13017
  let isShutdown = false;
12785
13018
  let totalInputTokens = 0;
@@ -13071,9 +13304,9 @@ var CodeEvaluator = class {
13071
13304
  if (outputForPayload) {
13072
13305
  const serialized = JSON.stringify(outputForPayload);
13073
13306
  if (serialized.length > FILE_BACKED_OUTPUT_THRESHOLD) {
13074
- const tmpDir = await (0, import_promises25.mkdtemp)((0, import_node_path35.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
13075
- outputPath = (0, import_node_path35.join)(tmpDir, "output.json");
13076
- await (0, import_promises25.writeFile)(outputPath, serialized);
13307
+ const tmpDir = await (0, import_promises26.mkdtemp)((0, import_node_path36.join)((0, import_node_os5.tmpdir)(), "agentv-judge-"));
13308
+ outputPath = (0, import_node_path36.join)(tmpDir, "output.json");
13309
+ await (0, import_promises26.writeFile)(outputPath, serialized);
13077
13310
  outputForPayload = null;
13078
13311
  }
13079
13312
  }
@@ -13182,7 +13415,7 @@ var CodeEvaluator = class {
13182
13415
  await proxyShutdown();
13183
13416
  }
13184
13417
  if (outputPath) {
13185
- await (0, import_promises25.rm)((0, import_node_path35.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
13418
+ await (0, import_promises26.rm)((0, import_node_path36.dirname)(outputPath), { recursive: true, force: true }).catch(() => {
13186
13419
  });
13187
13420
  }
13188
13421
  }
@@ -13218,7 +13451,7 @@ var AGENT_PROVIDER_KINDS = [
13218
13451
  "copilot-sdk",
13219
13452
  "copilot-cli",
13220
13453
  "pi-coding-agent",
13221
- "pi-agent-sdk",
13454
+ "pi-cli",
13222
13455
  "claude",
13223
13456
  "claude-cli",
13224
13457
  "claude-sdk",
@@ -13245,8 +13478,8 @@ function isAgentProvider(provider) {
13245
13478
  }
13246
13479
 
13247
13480
  // src/evaluation/evaluators/llm-grader.ts
13248
- var import_promises26 = __toESM(require("fs/promises"), 1);
13249
- var import_node_path36 = __toESM(require("path"), 1);
13481
+ var import_promises27 = __toESM(require("fs/promises"), 1);
13482
+ var import_node_path37 = __toESM(require("path"), 1);
13250
13483
  var import_ai2 = require("ai");
13251
13484
  var import_zod4 = require("zod");
13252
13485
  var DEFAULT_MAX_STEPS = 10;
@@ -13435,7 +13668,7 @@ ${context2.fileChanges}`;
13435
13668
  async evaluateWithRubrics(context2, graderProvider, rubrics) {
13436
13669
  if (!rubrics || rubrics.length === 0) {
13437
13670
  throw new Error(
13438
- `No rubrics found for evaluator "${context2.evaluator?.name ?? "llm-grader"}". Run "agentv generate rubrics" first.`
13671
+ `No rubrics found for evaluator "${context2.evaluator?.name ?? "llm-grader"}". Add rubric criteria under assertions or use the agentv-eval-writer skill for authoring help.`
13439
13672
  );
13440
13673
  }
13441
13674
  const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
@@ -14101,8 +14334,8 @@ function calculateScoreRangeResult(result, rubrics) {
14101
14334
  };
14102
14335
  }
14103
14336
  function resolveSandboxed(basePath, relativePath) {
14104
- const resolved = import_node_path36.default.resolve(basePath, relativePath);
14105
- if (!resolved.startsWith(basePath + import_node_path36.default.sep) && resolved !== basePath) {
14337
+ const resolved = import_node_path37.default.resolve(basePath, relativePath);
14338
+ if (!resolved.startsWith(basePath + import_node_path37.default.sep) && resolved !== basePath) {
14106
14339
  throw new Error(`Path '${relativePath}' is outside the workspace`);
14107
14340
  }
14108
14341
  return resolved;
@@ -14117,7 +14350,7 @@ function createFilesystemTools(workspacePath) {
14117
14350
  execute: async (input) => {
14118
14351
  try {
14119
14352
  const resolved = resolveSandboxed(workspacePath, input.path);
14120
- const entries = await import_promises26.default.readdir(resolved, { withFileTypes: true });
14353
+ const entries = await import_promises27.default.readdir(resolved, { withFileTypes: true });
14121
14354
  return entries.map((e) => ({
14122
14355
  name: e.name,
14123
14356
  type: e.isDirectory() ? "directory" : "file"
@@ -14135,12 +14368,12 @@ function createFilesystemTools(workspacePath) {
14135
14368
  execute: async (input) => {
14136
14369
  try {
14137
14370
  const resolved = resolveSandboxed(workspacePath, input.path);
14138
- const stat8 = await import_promises26.default.stat(resolved);
14371
+ const stat8 = await import_promises27.default.stat(resolved);
14139
14372
  if (stat8.isDirectory()) {
14140
14373
  return { error: `'${input.path}' is a directory, not a file` };
14141
14374
  }
14142
14375
  const buffer = Buffer.alloc(Math.min(stat8.size, MAX_FILE_SIZE));
14143
- const fd = await import_promises26.default.open(resolved, "r");
14376
+ const fd = await import_promises27.default.open(resolved, "r");
14144
14377
  try {
14145
14378
  await fd.read(buffer, 0, buffer.length, 0);
14146
14379
  } finally {
@@ -14185,30 +14418,30 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
14185
14418
  if (matches.length >= MAX_SEARCH_MATCHES) return;
14186
14419
  let entries;
14187
14420
  try {
14188
- entries = await import_promises26.default.readdir(dirPath, { withFileTypes: true });
14421
+ entries = await import_promises27.default.readdir(dirPath, { withFileTypes: true });
14189
14422
  } catch {
14190
14423
  return;
14191
14424
  }
14192
14425
  for (const entry of entries) {
14193
14426
  if (matches.length >= MAX_SEARCH_MATCHES) return;
14194
14427
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
14195
- const fullPath = import_node_path36.default.join(dirPath, entry.name);
14428
+ const fullPath = import_node_path37.default.join(dirPath, entry.name);
14196
14429
  if (entry.isDirectory()) {
14197
14430
  await searchDirectory(fullPath, workspacePath, regex, matches);
14198
14431
  } else if (entry.isFile()) {
14199
- const ext = import_node_path36.default.extname(entry.name).toLowerCase();
14432
+ const ext = import_node_path37.default.extname(entry.name).toLowerCase();
14200
14433
  if (BINARY_EXTENSIONS.has(ext)) continue;
14201
14434
  try {
14202
- const stat8 = await import_promises26.default.stat(fullPath);
14435
+ const stat8 = await import_promises27.default.stat(fullPath);
14203
14436
  if (stat8.size > MAX_FILE_SIZE) continue;
14204
- const content = await import_promises26.default.readFile(fullPath, "utf-8");
14437
+ const content = await import_promises27.default.readFile(fullPath, "utf-8");
14205
14438
  const lines = content.split("\n");
14206
14439
  for (let i = 0; i < lines.length; i++) {
14207
14440
  if (matches.length >= MAX_SEARCH_MATCHES) return;
14208
14441
  regex.lastIndex = 0;
14209
14442
  if (regex.test(lines[i])) {
14210
14443
  matches.push({
14211
- file: import_node_path36.default.relative(workspacePath, fullPath),
14444
+ file: import_node_path37.default.relative(workspacePath, fullPath),
14212
14445
  line: i + 1,
14213
14446
  text: lines[i].substring(0, 200)
14214
14447
  });
@@ -14843,115 +15076,115 @@ var FieldAccuracyEvaluator = class {
14843
15076
  * Evaluate a single field against the expected value.
14844
15077
  */
14845
15078
  evaluateField(fieldConfig, candidateData, expectedData) {
14846
- const { path: path47, match, required = true, weight = 1 } = fieldConfig;
14847
- const candidateValue = resolvePath(candidateData, path47);
14848
- const expectedValue = resolvePath(expectedData, path47);
15079
+ const { path: path48, match, required = true, weight = 1 } = fieldConfig;
15080
+ const candidateValue = resolvePath(candidateData, path48);
15081
+ const expectedValue = resolvePath(expectedData, path48);
14849
15082
  if (expectedValue === void 0) {
14850
15083
  return {
14851
- path: path47,
15084
+ path: path48,
14852
15085
  score: 1,
14853
15086
  // No expected value means no comparison needed
14854
15087
  weight,
14855
15088
  hit: true,
14856
- message: `${path47}: no expected value`
15089
+ message: `${path48}: no expected value`
14857
15090
  };
14858
15091
  }
14859
15092
  if (candidateValue === void 0) {
14860
15093
  if (required) {
14861
15094
  return {
14862
- path: path47,
15095
+ path: path48,
14863
15096
  score: 0,
14864
15097
  weight,
14865
15098
  hit: false,
14866
- message: `${path47} (required, missing)`
15099
+ message: `${path48} (required, missing)`
14867
15100
  };
14868
15101
  }
14869
15102
  return {
14870
- path: path47,
15103
+ path: path48,
14871
15104
  score: 1,
14872
15105
  // Don't penalize missing optional fields
14873
15106
  weight: 0,
14874
15107
  // Zero weight means it won't affect the score
14875
15108
  hit: true,
14876
- message: `${path47}: optional field missing`
15109
+ message: `${path48}: optional field missing`
14877
15110
  };
14878
15111
  }
14879
15112
  switch (match) {
14880
15113
  case "exact":
14881
- return this.compareExact(path47, candidateValue, expectedValue, weight);
15114
+ return this.compareExact(path48, candidateValue, expectedValue, weight);
14882
15115
  case "numeric_tolerance":
14883
15116
  return this.compareNumericTolerance(
14884
- path47,
15117
+ path48,
14885
15118
  candidateValue,
14886
15119
  expectedValue,
14887
15120
  fieldConfig,
14888
15121
  weight
14889
15122
  );
14890
15123
  case "date":
14891
- return this.compareDate(path47, candidateValue, expectedValue, fieldConfig, weight);
15124
+ return this.compareDate(path48, candidateValue, expectedValue, fieldConfig, weight);
14892
15125
  default:
14893
15126
  return {
14894
- path: path47,
15127
+ path: path48,
14895
15128
  score: 0,
14896
15129
  weight,
14897
15130
  hit: false,
14898
- message: `${path47}: unknown match type "${match}"`
15131
+ message: `${path48}: unknown match type "${match}"`
14899
15132
  };
14900
15133
  }
14901
15134
  }
14902
15135
  /**
14903
15136
  * Exact equality comparison.
14904
15137
  */
14905
- compareExact(path47, candidateValue, expectedValue, weight) {
15138
+ compareExact(path48, candidateValue, expectedValue, weight) {
14906
15139
  if (deepEqual(candidateValue, expectedValue)) {
14907
15140
  return {
14908
- path: path47,
15141
+ path: path48,
14909
15142
  score: 1,
14910
15143
  weight,
14911
15144
  hit: true,
14912
- message: path47
15145
+ message: path48
14913
15146
  };
14914
15147
  }
14915
15148
  if (typeof candidateValue !== typeof expectedValue) {
14916
15149
  return {
14917
- path: path47,
15150
+ path: path48,
14918
15151
  score: 0,
14919
15152
  weight,
14920
15153
  hit: false,
14921
- message: `${path47} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
15154
+ message: `${path48} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
14922
15155
  };
14923
15156
  }
14924
15157
  return {
14925
- path: path47,
15158
+ path: path48,
14926
15159
  score: 0,
14927
15160
  weight,
14928
15161
  hit: false,
14929
- message: `${path47} (value mismatch)`
15162
+ message: `${path48} (value mismatch)`
14930
15163
  };
14931
15164
  }
14932
15165
  /**
14933
15166
  * Numeric comparison with absolute or relative tolerance.
14934
15167
  */
14935
- compareNumericTolerance(path47, candidateValue, expectedValue, fieldConfig, weight) {
15168
+ compareNumericTolerance(path48, candidateValue, expectedValue, fieldConfig, weight) {
14936
15169
  const { tolerance = 0, relative = false } = fieldConfig;
14937
15170
  const candidateNum = toNumber(candidateValue);
14938
15171
  const expectedNum = toNumber(expectedValue);
14939
15172
  if (candidateNum === null || expectedNum === null) {
14940
15173
  return {
14941
- path: path47,
15174
+ path: path48,
14942
15175
  score: 0,
14943
15176
  weight,
14944
15177
  hit: false,
14945
- message: `${path47} (non-numeric value)`
15178
+ message: `${path48} (non-numeric value)`
14946
15179
  };
14947
15180
  }
14948
15181
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
14949
15182
  return {
14950
- path: path47,
15183
+ path: path48,
14951
15184
  score: 0,
14952
15185
  weight,
14953
15186
  hit: false,
14954
- message: `${path47} (invalid numeric value)`
15187
+ message: `${path48} (invalid numeric value)`
14955
15188
  };
14956
15189
  }
14957
15190
  const diff = Math.abs(candidateNum - expectedNum);
@@ -14964,61 +15197,61 @@ var FieldAccuracyEvaluator = class {
14964
15197
  }
14965
15198
  if (withinTolerance) {
14966
15199
  return {
14967
- path: path47,
15200
+ path: path48,
14968
15201
  score: 1,
14969
15202
  weight,
14970
15203
  hit: true,
14971
- message: `${path47} (within tolerance: diff=${diff.toFixed(2)})`
15204
+ message: `${path48} (within tolerance: diff=${diff.toFixed(2)})`
14972
15205
  };
14973
15206
  }
14974
15207
  return {
14975
- path: path47,
15208
+ path: path48,
14976
15209
  score: 0,
14977
15210
  weight,
14978
15211
  hit: false,
14979
- message: `${path47} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
15212
+ message: `${path48} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
14980
15213
  };
14981
15214
  }
14982
15215
  /**
14983
15216
  * Date comparison with format normalization.
14984
15217
  */
14985
- compareDate(path47, candidateValue, expectedValue, fieldConfig, weight) {
15218
+ compareDate(path48, candidateValue, expectedValue, fieldConfig, weight) {
14986
15219
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
14987
15220
  const candidateDate = parseDate(String(candidateValue), formats);
14988
15221
  const expectedDate = parseDate(String(expectedValue), formats);
14989
15222
  if (candidateDate === null) {
14990
15223
  return {
14991
- path: path47,
15224
+ path: path48,
14992
15225
  score: 0,
14993
15226
  weight,
14994
15227
  hit: false,
14995
- message: `${path47} (unparseable candidate date)`
15228
+ message: `${path48} (unparseable candidate date)`
14996
15229
  };
14997
15230
  }
14998
15231
  if (expectedDate === null) {
14999
15232
  return {
15000
- path: path47,
15233
+ path: path48,
15001
15234
  score: 0,
15002
15235
  weight,
15003
15236
  hit: false,
15004
- message: `${path47} (unparseable expected date)`
15237
+ message: `${path48} (unparseable expected date)`
15005
15238
  };
15006
15239
  }
15007
15240
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
15008
15241
  return {
15009
- path: path47,
15242
+ path: path48,
15010
15243
  score: 1,
15011
15244
  weight,
15012
15245
  hit: true,
15013
- message: path47
15246
+ message: path48
15014
15247
  };
15015
15248
  }
15016
15249
  return {
15017
- path: path47,
15250
+ path: path48,
15018
15251
  score: 0,
15019
15252
  weight,
15020
15253
  hit: false,
15021
- message: `${path47} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
15254
+ message: `${path48} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
15022
15255
  };
15023
15256
  }
15024
15257
  /**
@@ -15051,11 +15284,11 @@ var FieldAccuracyEvaluator = class {
15051
15284
  };
15052
15285
  }
15053
15286
  };
15054
- function resolvePath(obj, path47) {
15055
- if (!path47 || !obj) {
15287
+ function resolvePath(obj, path48) {
15288
+ if (!path48 || !obj) {
15056
15289
  return void 0;
15057
15290
  }
15058
- const parts = path47.split(/\.|\[|\]/).filter((p) => p.length > 0);
15291
+ const parts = path48.split(/\.|\[|\]/).filter((p) => p.length > 0);
15059
15292
  let current = obj;
15060
15293
  for (const part of parts) {
15061
15294
  if (current === null || current === void 0) {
@@ -15221,9 +15454,7 @@ var PROVIDER_TOOL_SEMANTICS = {
15221
15454
  "claude-sdk": CLAUDE_MATCHER,
15222
15455
  codex: CODEX_MATCHER,
15223
15456
  "pi-coding-agent": PI_CODING_AGENT_MATCHER,
15224
- // pi-agent-sdk has no tools, so skill detection is a no-op. Kept for completeness.
15225
- // TODO: consider removing pi-agent-sdk provider entirely.
15226
- "pi-agent-sdk": PI_CODING_AGENT_MATCHER,
15457
+ "pi-cli": PI_CODING_AGENT_MATCHER,
15227
15458
  "copilot-cli": COPILOT_MATCHER,
15228
15459
  "copilot-sdk": COPILOT_MATCHER,
15229
15460
  vscode: COPILOT_MATCHER,
@@ -15538,8 +15769,8 @@ var TokenUsageEvaluator = class {
15538
15769
  };
15539
15770
 
15540
15771
  // src/evaluation/evaluators/tool-trajectory.ts
15541
- function getNestedValue(obj, path47) {
15542
- const parts = path47.split(".");
15772
+ function getNestedValue(obj, path48) {
15773
+ const parts = path48.split(".");
15543
15774
  let current = obj;
15544
15775
  for (const part of parts) {
15545
15776
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -16159,9 +16390,9 @@ function runEqualsAssertion(output, value) {
16159
16390
  }
16160
16391
 
16161
16392
  // src/evaluation/orchestrator.ts
16162
- var import_node_crypto10 = require("crypto");
16163
- var import_promises30 = require("fs/promises");
16164
- var import_node_path45 = __toESM(require("path"), 1);
16393
+ var import_node_crypto11 = require("crypto");
16394
+ var import_promises31 = require("fs/promises");
16395
+ var import_node_path46 = __toESM(require("path"), 1);
16165
16396
  var import_micromatch3 = __toESM(require("micromatch"), 1);
16166
16397
 
16167
16398
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -16375,7 +16606,7 @@ var InlineAssertEvaluator = class {
16375
16606
  };
16376
16607
 
16377
16608
  // src/evaluation/evaluators/prompt-resolution.ts
16378
- var import_node_path37 = __toESM(require("path"), 1);
16609
+ var import_node_path38 = __toESM(require("path"), 1);
16379
16610
  async function resolveCustomPrompt(promptConfig, context2, timeoutMs) {
16380
16611
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
16381
16612
  if (!context2) {
@@ -16421,7 +16652,7 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
16421
16652
  };
16422
16653
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
16423
16654
  const scriptPath = script[script.length - 1];
16424
- const cwd = import_node_path37.default.dirname(scriptPath);
16655
+ const cwd = import_node_path38.default.dirname(scriptPath);
16425
16656
  try {
16426
16657
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
16427
16658
  const prompt = stdout.trim();
@@ -16693,16 +16924,16 @@ function createBuiltinRegistry() {
16693
16924
  }
16694
16925
 
16695
16926
  // src/evaluation/registry/assertion-discovery.ts
16696
- var import_node_path38 = __toESM(require("path"), 1);
16927
+ var import_node_path39 = __toESM(require("path"), 1);
16697
16928
  var import_fast_glob3 = __toESM(require("fast-glob"), 1);
16698
16929
  async function discoverAssertions(registry, baseDir) {
16699
16930
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
16700
16931
  const candidateDirs = [];
16701
- let dir = import_node_path38.default.resolve(baseDir);
16702
- const root = import_node_path38.default.parse(dir).root;
16932
+ let dir = import_node_path39.default.resolve(baseDir);
16933
+ const root = import_node_path39.default.parse(dir).root;
16703
16934
  while (dir !== root) {
16704
- candidateDirs.push(import_node_path38.default.join(dir, ".agentv", "assertions"));
16705
- dir = import_node_path38.default.dirname(dir);
16935
+ candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "assertions"));
16936
+ dir = import_node_path39.default.dirname(dir);
16706
16937
  }
16707
16938
  let files = [];
16708
16939
  for (const assertionsDir of candidateDirs) {
@@ -16718,7 +16949,7 @@ async function discoverAssertions(registry, baseDir) {
16718
16949
  }
16719
16950
  const discoveredTypes = [];
16720
16951
  for (const filePath of files) {
16721
- const basename = import_node_path38.default.basename(filePath);
16952
+ const basename = import_node_path39.default.basename(filePath);
16722
16953
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
16723
16954
  if (registry.has(typeName)) {
16724
16955
  continue;
@@ -16736,17 +16967,17 @@ async function discoverAssertions(registry, baseDir) {
16736
16967
  }
16737
16968
 
16738
16969
  // src/evaluation/registry/grader-discovery.ts
16739
- var import_node_path39 = __toESM(require("path"), 1);
16970
+ var import_node_path40 = __toESM(require("path"), 1);
16740
16971
  var import_fast_glob4 = __toESM(require("fast-glob"), 1);
16741
16972
  async function discoverGraders(registry, baseDir) {
16742
16973
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
16743
16974
  const candidateDirs = [];
16744
- let dir = import_node_path39.default.resolve(baseDir);
16745
- const root = import_node_path39.default.parse(dir).root;
16975
+ let dir = import_node_path40.default.resolve(baseDir);
16976
+ const root = import_node_path40.default.parse(dir).root;
16746
16977
  while (dir !== root) {
16747
- candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "graders"));
16748
- candidateDirs.push(import_node_path39.default.join(dir, ".agentv", "judges"));
16749
- dir = import_node_path39.default.dirname(dir);
16978
+ candidateDirs.push(import_node_path40.default.join(dir, ".agentv", "graders"));
16979
+ candidateDirs.push(import_node_path40.default.join(dir, ".agentv", "judges"));
16980
+ dir = import_node_path40.default.dirname(dir);
16750
16981
  }
16751
16982
  let files = [];
16752
16983
  for (const gradersDir of candidateDirs) {
@@ -16762,7 +16993,7 @@ async function discoverGraders(registry, baseDir) {
16762
16993
  }
16763
16994
  const discoveredTypes = [];
16764
16995
  for (const filePath of files) {
16765
- const basename = import_node_path39.default.basename(filePath);
16996
+ const basename = import_node_path40.default.basename(filePath);
16766
16997
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
16767
16998
  if (registry.has(typeName)) {
16768
16999
  continue;
@@ -16920,11 +17151,11 @@ function getTCritical(df) {
16920
17151
  }
16921
17152
 
16922
17153
  // src/evaluation/workspace/file-changes.ts
16923
- var import_node_child_process7 = require("child_process");
16924
- var import_node_fs11 = require("fs");
16925
- var import_node_path40 = __toESM(require("path"), 1);
17154
+ var import_node_child_process8 = require("child_process");
17155
+ var import_node_fs12 = require("fs");
17156
+ var import_node_path41 = __toESM(require("path"), 1);
16926
17157
  var import_node_util4 = require("util");
16927
- var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process7.exec);
17158
+ var execAsync4 = (0, import_node_util4.promisify)(import_node_child_process8.exec);
16928
17159
  function gitExecOpts(workspacePath) {
16929
17160
  const { GIT_DIR: _, GIT_WORK_TREE: __, ...env } = process.env;
16930
17161
  return { cwd: workspacePath, env };
@@ -16950,16 +17181,16 @@ async function captureFileChanges(workspacePath, baselineCommit) {
16950
17181
  async function stageNestedRepoChanges(workspacePath) {
16951
17182
  let entries;
16952
17183
  try {
16953
- entries = (0, import_node_fs11.readdirSync)(workspacePath);
17184
+ entries = (0, import_node_fs12.readdirSync)(workspacePath);
16954
17185
  } catch {
16955
17186
  return;
16956
17187
  }
16957
17188
  for (const entry of entries) {
16958
17189
  if (entry === ".git" || entry === "node_modules") continue;
16959
- const childPath = import_node_path40.default.join(workspacePath, entry);
17190
+ const childPath = import_node_path41.default.join(workspacePath, entry);
16960
17191
  try {
16961
- if (!(0, import_node_fs11.statSync)(childPath).isDirectory()) continue;
16962
- if (!(0, import_node_fs11.statSync)(import_node_path40.default.join(childPath, ".git")).isDirectory()) continue;
17192
+ if (!(0, import_node_fs12.statSync)(childPath).isDirectory()) continue;
17193
+ if (!(0, import_node_fs12.statSync)(import_node_path41.default.join(childPath, ".git")).isDirectory()) continue;
16963
17194
  } catch {
16964
17195
  continue;
16965
17196
  }
@@ -16969,8 +17200,8 @@ async function stageNestedRepoChanges(workspacePath) {
16969
17200
  }
16970
17201
 
16971
17202
  // src/evaluation/workspace/manager.ts
16972
- var import_promises27 = require("fs/promises");
16973
- var import_node_path41 = __toESM(require("path"), 1);
17203
+ var import_promises28 = require("fs/promises");
17204
+ var import_node_path42 = __toESM(require("path"), 1);
16974
17205
  var TemplateNotFoundError = class extends Error {
16975
17206
  constructor(templatePath) {
16976
17207
  super(`Workspace template not found: ${templatePath}`);
@@ -16992,7 +17223,7 @@ var WorkspaceCreationError = class extends Error {
16992
17223
  };
16993
17224
  async function isDirectory(filePath) {
16994
17225
  try {
16995
- const stats = await (0, import_promises27.stat)(filePath);
17226
+ const stats = await (0, import_promises28.stat)(filePath);
16996
17227
  return stats.isDirectory();
16997
17228
  } catch {
16998
17229
  return false;
@@ -17000,26 +17231,26 @@ async function isDirectory(filePath) {
17000
17231
  }
17001
17232
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
17002
17233
  const root = workspaceRoot ?? getWorkspacesRoot();
17003
- return import_node_path41.default.join(root, evalRunId, caseId);
17234
+ return import_node_path42.default.join(root, evalRunId, caseId);
17004
17235
  }
17005
17236
  async function copyDirectoryRecursive(src, dest) {
17006
- await (0, import_promises27.mkdir)(dest, { recursive: true });
17007
- const entries = await (0, import_promises27.readdir)(src, { withFileTypes: true });
17237
+ await (0, import_promises28.mkdir)(dest, { recursive: true });
17238
+ const entries = await (0, import_promises28.readdir)(src, { withFileTypes: true });
17008
17239
  for (const entry of entries) {
17009
- const srcPath = import_node_path41.default.join(src, entry.name);
17010
- const destPath = import_node_path41.default.join(dest, entry.name);
17240
+ const srcPath = import_node_path42.default.join(src, entry.name);
17241
+ const destPath = import_node_path42.default.join(dest, entry.name);
17011
17242
  if (entry.name === ".git") {
17012
17243
  continue;
17013
17244
  }
17014
17245
  if (entry.isDirectory()) {
17015
17246
  await copyDirectoryRecursive(srcPath, destPath);
17016
17247
  } else {
17017
- await (0, import_promises27.cp)(srcPath, destPath, { preserveTimestamps: true });
17248
+ await (0, import_promises28.cp)(srcPath, destPath, { preserveTimestamps: true });
17018
17249
  }
17019
17250
  }
17020
17251
  }
17021
17252
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
17022
- const resolvedTemplatePath = import_node_path41.default.resolve(templatePath);
17253
+ const resolvedTemplatePath = import_node_path42.default.resolve(templatePath);
17023
17254
  if (!await fileExists2(resolvedTemplatePath)) {
17024
17255
  throw new TemplateNotFoundError(resolvedTemplatePath);
17025
17256
  }
@@ -17029,7 +17260,7 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
17029
17260
  const workspacePath = getWorkspacePath(evalRunId, caseId, workspaceRoot);
17030
17261
  try {
17031
17262
  if (await fileExists2(workspacePath)) {
17032
- await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
17263
+ await (0, import_promises28.rm)(workspacePath, { recursive: true, force: true });
17033
17264
  }
17034
17265
  await copyDirectoryRecursive(resolvedTemplatePath, workspacePath);
17035
17266
  return workspacePath;
@@ -17063,25 +17294,25 @@ async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoo
17063
17294
  }
17064
17295
  async function cleanupWorkspace(workspacePath) {
17065
17296
  if (await fileExists2(workspacePath)) {
17066
- await (0, import_promises27.rm)(workspacePath, { recursive: true, force: true });
17297
+ await (0, import_promises28.rm)(workspacePath, { recursive: true, force: true });
17067
17298
  }
17068
17299
  }
17069
17300
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
17070
17301
  const root = workspaceRoot ?? getWorkspacesRoot();
17071
- const evalDir = import_node_path41.default.join(root, evalRunId);
17302
+ const evalDir = import_node_path42.default.join(root, evalRunId);
17072
17303
  if (await fileExists2(evalDir)) {
17073
- await (0, import_promises27.rm)(evalDir, { recursive: true, force: true });
17304
+ await (0, import_promises28.rm)(evalDir, { recursive: true, force: true });
17074
17305
  }
17075
17306
  }
17076
17307
 
17077
17308
  // src/evaluation/workspace/pool-manager.ts
17078
- var import_node_child_process8 = require("child_process");
17079
- var import_node_crypto9 = require("crypto");
17080
- var import_node_fs12 = require("fs");
17081
- var import_promises28 = require("fs/promises");
17082
- var import_node_path42 = __toESM(require("path"), 1);
17309
+ var import_node_child_process9 = require("child_process");
17310
+ var import_node_crypto10 = require("crypto");
17311
+ var import_node_fs13 = require("fs");
17312
+ var import_promises29 = require("fs/promises");
17313
+ var import_node_path43 = __toESM(require("path"), 1);
17083
17314
  var import_node_util5 = require("util");
17084
- var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process8.execFile);
17315
+ var execFileAsync = (0, import_node_util5.promisify)(import_node_child_process9.execFile);
17085
17316
  function gitEnv() {
17086
17317
  const env = { ...process.env };
17087
17318
  for (const key of Object.keys(env)) {
@@ -17127,14 +17358,14 @@ function computeWorkspaceFingerprint(repos) {
17127
17358
  const canonical = {
17128
17359
  repos: [...repos].sort((a, b) => a.path.localeCompare(b.path)).map(normalizeRepoForFingerprint)
17129
17360
  };
17130
- return (0, import_node_crypto9.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
17361
+ return (0, import_node_crypto10.createHash)("sha256").update(JSON.stringify(canonical)).digest("hex");
17131
17362
  }
17132
17363
  async function copyDirectoryRecursive2(src, dest, skipDirs) {
17133
- await (0, import_promises28.mkdir)(dest, { recursive: true });
17134
- const entries = await (0, import_promises28.readdir)(src, { withFileTypes: true });
17364
+ await (0, import_promises29.mkdir)(dest, { recursive: true });
17365
+ const entries = await (0, import_promises29.readdir)(src, { withFileTypes: true });
17135
17366
  for (const entry of entries) {
17136
- const srcPath = import_node_path42.default.join(src, entry.name);
17137
- const destPath = import_node_path42.default.join(dest, entry.name);
17367
+ const srcPath = import_node_path43.default.join(src, entry.name);
17368
+ const destPath = import_node_path43.default.join(dest, entry.name);
17138
17369
  if (entry.name === ".git") {
17139
17370
  continue;
17140
17371
  }
@@ -17144,7 +17375,7 @@ async function copyDirectoryRecursive2(src, dest, skipDirs) {
17144
17375
  }
17145
17376
  await copyDirectoryRecursive2(srcPath, destPath, skipDirs);
17146
17377
  } else {
17147
- await (0, import_promises28.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
17378
+ await (0, import_promises29.cp)(srcPath, destPath, { preserveTimestamps: true, force: true });
17148
17379
  }
17149
17380
  }
17150
17381
  }
@@ -17167,8 +17398,8 @@ var WorkspacePoolManager = class {
17167
17398
  async acquireWorkspace(options) {
17168
17399
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
17169
17400
  const fingerprint = computeWorkspaceFingerprint(repos);
17170
- const poolDir = import_node_path42.default.join(this.poolRoot, fingerprint);
17171
- await (0, import_promises28.mkdir)(poolDir, { recursive: true });
17401
+ const poolDir = import_node_path43.default.join(this.poolRoot, fingerprint);
17402
+ await (0, import_promises29.mkdir)(poolDir, { recursive: true });
17172
17403
  const drifted = await this.checkDrift(poolDir, fingerprint);
17173
17404
  if (drifted) {
17174
17405
  console.warn(
@@ -17177,13 +17408,13 @@ var WorkspacePoolManager = class {
17177
17408
  await this.removeAllSlots(poolDir);
17178
17409
  }
17179
17410
  for (let i = 0; i < maxSlots; i++) {
17180
- const slotPath = import_node_path42.default.join(poolDir, `slot-${i}`);
17411
+ const slotPath = import_node_path43.default.join(poolDir, `slot-${i}`);
17181
17412
  const lockPath = `${slotPath}.lock`;
17182
17413
  const locked = await this.tryLock(lockPath);
17183
17414
  if (!locked) {
17184
17415
  continue;
17185
17416
  }
17186
- const slotExists = (0, import_node_fs12.existsSync)(slotPath);
17417
+ const slotExists = (0, import_node_fs13.existsSync)(slotPath);
17187
17418
  if (slotExists) {
17188
17419
  await this.resetSlot(slotPath, templatePath, repos, poolReset);
17189
17420
  return {
@@ -17195,7 +17426,7 @@ var WorkspacePoolManager = class {
17195
17426
  poolDir
17196
17427
  };
17197
17428
  }
17198
- await (0, import_promises28.mkdir)(slotPath, { recursive: true });
17429
+ await (0, import_promises29.mkdir)(slotPath, { recursive: true });
17199
17430
  if (templatePath) {
17200
17431
  await copyDirectoryRecursive2(templatePath, slotPath);
17201
17432
  }
@@ -17219,7 +17450,7 @@ var WorkspacePoolManager = class {
17219
17450
  /** Remove lock file to release a slot. */
17220
17451
  async releaseSlot(slot) {
17221
17452
  try {
17222
- await (0, import_promises28.unlink)(slot.lockPath);
17453
+ await (0, import_promises29.unlink)(slot.lockPath);
17223
17454
  } catch {
17224
17455
  }
17225
17456
  }
@@ -17232,21 +17463,21 @@ var WorkspacePoolManager = class {
17232
17463
  async tryLock(lockPath) {
17233
17464
  for (let attempt = 0; attempt < 3; attempt++) {
17234
17465
  try {
17235
- await (0, import_promises28.writeFile)(lockPath, String(process.pid), { flag: "wx" });
17466
+ await (0, import_promises29.writeFile)(lockPath, String(process.pid), { flag: "wx" });
17236
17467
  return true;
17237
17468
  } catch (err) {
17238
17469
  if (err.code !== "EEXIST") {
17239
17470
  throw err;
17240
17471
  }
17241
17472
  try {
17242
- const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
17473
+ const pidStr = await (0, import_promises29.readFile)(lockPath, "utf-8");
17243
17474
  const pid = Number.parseInt(pidStr.trim(), 10);
17244
17475
  if (!Number.isNaN(pid)) {
17245
17476
  try {
17246
17477
  process.kill(pid, 0);
17247
17478
  return false;
17248
17479
  } catch {
17249
- await (0, import_promises28.unlink)(lockPath).catch(() => {
17480
+ await (0, import_promises29.unlink)(lockPath).catch(() => {
17250
17481
  });
17251
17482
  continue;
17252
17483
  }
@@ -17264,9 +17495,9 @@ var WorkspacePoolManager = class {
17264
17495
  * Returns false (no drift) if metadata.json doesn't exist (first use).
17265
17496
  */
17266
17497
  async checkDrift(poolDir, fingerprint) {
17267
- const metadataPath = import_node_path42.default.join(poolDir, "metadata.json");
17498
+ const metadataPath = import_node_path43.default.join(poolDir, "metadata.json");
17268
17499
  try {
17269
- const raw = await (0, import_promises28.readFile)(metadataPath, "utf-8");
17500
+ const raw = await (0, import_promises29.readFile)(metadataPath, "utf-8");
17270
17501
  const metadata = JSON.parse(raw);
17271
17502
  return metadata.fingerprint !== fingerprint;
17272
17503
  } catch {
@@ -17281,17 +17512,17 @@ var WorkspacePoolManager = class {
17281
17512
  repos,
17282
17513
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
17283
17514
  };
17284
- await (0, import_promises28.writeFile)(import_node_path42.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
17515
+ await (0, import_promises29.writeFile)(import_node_path43.default.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
17285
17516
  }
17286
17517
  /** Remove all slot directories and their lock files from a pool directory. */
17287
17518
  async removeAllSlots(poolDir) {
17288
- const entries = await (0, import_promises28.readdir)(poolDir);
17519
+ const entries = await (0, import_promises29.readdir)(poolDir);
17289
17520
  for (const entry of entries) {
17290
17521
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
17291
- const lockPath = import_node_path42.default.join(poolDir, `${entry}.lock`);
17292
- if ((0, import_node_fs12.existsSync)(lockPath)) {
17522
+ const lockPath = import_node_path43.default.join(poolDir, `${entry}.lock`);
17523
+ if ((0, import_node_fs13.existsSync)(lockPath)) {
17293
17524
  try {
17294
- const pidStr = await (0, import_promises28.readFile)(lockPath, "utf-8");
17525
+ const pidStr = await (0, import_promises29.readFile)(lockPath, "utf-8");
17295
17526
  const pid = Number.parseInt(pidStr.trim(), 10);
17296
17527
  if (!Number.isNaN(pid)) {
17297
17528
  try {
@@ -17304,12 +17535,12 @@ var WorkspacePoolManager = class {
17304
17535
  } catch {
17305
17536
  }
17306
17537
  }
17307
- await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, entry), { recursive: true, force: true });
17308
- await (0, import_promises28.rm)(lockPath, { force: true }).catch(() => {
17538
+ await (0, import_promises29.rm)(import_node_path43.default.join(poolDir, entry), { recursive: true, force: true });
17539
+ await (0, import_promises29.rm)(lockPath, { force: true }).catch(() => {
17309
17540
  });
17310
17541
  }
17311
17542
  }
17312
- await (0, import_promises28.rm)(import_node_path42.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
17543
+ await (0, import_promises29.rm)(import_node_path43.default.join(poolDir, "metadata.json"), { force: true }).catch(() => {
17313
17544
  });
17314
17545
  }
17315
17546
  /**
@@ -17319,8 +17550,8 @@ var WorkspacePoolManager = class {
17319
17550
  */
17320
17551
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
17321
17552
  for (const repo of repos) {
17322
- const repoDir = import_node_path42.default.join(slotPath, repo.path);
17323
- if (!(0, import_node_fs12.existsSync)(repoDir)) {
17553
+ const repoDir = import_node_path43.default.join(slotPath, repo.path);
17554
+ if (!(0, import_node_fs13.existsSync)(repoDir)) {
17324
17555
  continue;
17325
17556
  }
17326
17557
  if (poolReset === "none") {
@@ -17344,11 +17575,11 @@ var WorkspacePoolManager = class {
17344
17575
  };
17345
17576
 
17346
17577
  // src/evaluation/workspace/repo-manager.ts
17347
- var import_node_child_process9 = require("child_process");
17348
- var import_node_fs13 = require("fs");
17349
- var import_node_path43 = __toESM(require("path"), 1);
17578
+ var import_node_child_process10 = require("child_process");
17579
+ var import_node_fs14 = require("fs");
17580
+ var import_node_path44 = __toESM(require("path"), 1);
17350
17581
  var import_node_util6 = require("util");
17351
- var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process9.execFile);
17582
+ var execFileAsync2 = (0, import_node_util6.promisify)(import_node_child_process10.execFile);
17352
17583
  var DEFAULT_TIMEOUT_MS2 = 3e5;
17353
17584
  function gitEnv2() {
17354
17585
  const env = { ...process.env };
@@ -17397,7 +17628,7 @@ var RepoManager = class {
17397
17628
  resolvedSourcePath: sourcePath ?? "",
17398
17629
  reason: "empty_path"
17399
17630
  });
17400
- } else if (!(0, import_node_fs13.existsSync)(sourcePath)) {
17631
+ } else if (!(0, import_node_fs14.existsSync)(sourcePath)) {
17401
17632
  errors.push({
17402
17633
  repoPath: repo.path,
17403
17634
  resolvedSourcePath: sourcePath,
@@ -17446,7 +17677,7 @@ ${lines.join("\n")}`;
17446
17677
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
17447
17678
  */
17448
17679
  async materialize(repo, workspacePath) {
17449
- const targetDir = import_node_path43.default.join(workspacePath, repo.path);
17680
+ const targetDir = import_node_path44.default.join(workspacePath, repo.path);
17450
17681
  const sourceUrl = getSourceUrl(repo.source);
17451
17682
  const startedAt = Date.now();
17452
17683
  if (this.verbose) {
@@ -17537,7 +17768,7 @@ ${lines.join("\n")}`;
17537
17768
  async reset(repos, workspacePath, reset) {
17538
17769
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
17539
17770
  for (const repo of repos) {
17540
- const targetDir = import_node_path43.default.join(workspacePath, repo.path);
17771
+ const targetDir = import_node_path44.default.join(workspacePath, repo.path);
17541
17772
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
17542
17773
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
17543
17774
  }
@@ -17545,36 +17776,36 @@ ${lines.join("\n")}`;
17545
17776
  };
17546
17777
 
17547
17778
  // src/evaluation/workspace/resolve.ts
17548
- var import_promises29 = require("fs/promises");
17549
- var import_node_path44 = __toESM(require("path"), 1);
17779
+ var import_promises30 = require("fs/promises");
17780
+ var import_node_path45 = __toESM(require("path"), 1);
17550
17781
  async function resolveWorkspaceTemplate(templatePath) {
17551
17782
  if (!templatePath) {
17552
17783
  return void 0;
17553
17784
  }
17554
- const resolved = import_node_path44.default.resolve(templatePath);
17555
- const stats = await (0, import_promises29.stat)(resolved);
17785
+ const resolved = import_node_path45.default.resolve(templatePath);
17786
+ const stats = await (0, import_promises30.stat)(resolved);
17556
17787
  if (stats.isFile()) {
17557
17788
  return {
17558
- dir: import_node_path44.default.dirname(resolved),
17789
+ dir: import_node_path45.default.dirname(resolved),
17559
17790
  workspaceFile: resolved
17560
17791
  };
17561
17792
  }
17562
17793
  if (!stats.isDirectory()) {
17563
17794
  throw new Error(`workspace template is neither a file nor a directory: ${resolved}`);
17564
17795
  }
17565
- const entries = await (0, import_promises29.readdir)(resolved);
17796
+ const entries = await (0, import_promises30.readdir)(resolved);
17566
17797
  const workspaceFiles = entries.filter((e) => e.endsWith(".code-workspace"));
17567
17798
  if (workspaceFiles.length === 1) {
17568
17799
  return {
17569
17800
  dir: resolved,
17570
- workspaceFile: import_node_path44.default.join(resolved, workspaceFiles[0])
17801
+ workspaceFile: import_node_path45.default.join(resolved, workspaceFiles[0])
17571
17802
  };
17572
17803
  }
17573
17804
  if (workspaceFiles.length > 1) {
17574
17805
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
17575
17806
  return {
17576
17807
  dir: resolved,
17577
- workspaceFile: conventionFile ? import_node_path44.default.join(resolved, conventionFile) : void 0
17808
+ workspaceFile: conventionFile ? import_node_path45.default.join(resolved, conventionFile) : void 0
17578
17809
  };
17579
17810
  }
17580
17811
  return { dir: resolved };
@@ -17711,7 +17942,7 @@ async function runEvaluation(options) {
17711
17942
  );
17712
17943
  useCache = false;
17713
17944
  }
17714
- const evalRunId = (0, import_node_crypto10.randomUUID)();
17945
+ const evalRunId = (0, import_node_crypto11.randomUUID)();
17715
17946
  const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
17716
17947
  const filteredEvalCases = filterEvalCases(evalCases, filter);
17717
17948
  if (filteredEvalCases.length === 0) {
@@ -17790,7 +18021,7 @@ async function runEvaluation(options) {
17790
18021
  ];
17791
18022
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
17792
18023
  const typeRegistry = createBuiltinRegistry();
17793
- const discoveryBaseDir = evalFilePath ? import_node_path45.default.dirname(import_node_path45.default.resolve(evalFilePath)) : process.cwd();
18024
+ const discoveryBaseDir = evalFilePath ? import_node_path46.default.dirname(import_node_path46.default.resolve(evalFilePath)) : process.cwd();
17794
18025
  const evalDir = discoveryBaseDir;
17795
18026
  await discoverAssertions(typeRegistry, discoveryBaseDir);
17796
18027
  await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -17930,14 +18161,14 @@ async function runEvaluation(options) {
17930
18161
  let staticMaterialised = false;
17931
18162
  if (useStaticWorkspace && configuredStaticPath) {
17932
18163
  const isYamlConfiguredPath = !cliWorkspacePath && !!yamlWorkspacePath;
17933
- const dirExists = await (0, import_promises30.stat)(configuredStaticPath).then(
18164
+ const dirExists = await (0, import_promises31.stat)(configuredStaticPath).then(
17934
18165
  (s) => s.isDirectory(),
17935
18166
  () => false
17936
18167
  );
17937
- const isEmpty = dirExists ? (await (0, import_promises30.readdir)(configuredStaticPath)).length === 0 : false;
18168
+ const isEmpty = dirExists ? (await (0, import_promises31.readdir)(configuredStaticPath)).length === 0 : false;
17938
18169
  if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
17939
18170
  if (!dirExists) {
17940
- await (0, import_promises30.mkdir)(configuredStaticPath, { recursive: true });
18171
+ await (0, import_promises31.mkdir)(configuredStaticPath, { recursive: true });
17941
18172
  }
17942
18173
  if (workspaceTemplate) {
17943
18174
  await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
@@ -17982,14 +18213,14 @@ async function runEvaluation(options) {
17982
18213
  }
17983
18214
  } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
17984
18215
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
17985
- await (0, import_promises30.mkdir)(sharedWorkspacePath, { recursive: true });
18216
+ await (0, import_promises31.mkdir)(sharedWorkspacePath, { recursive: true });
17986
18217
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
17987
18218
  }
17988
18219
  try {
17989
18220
  if (suiteWorkspaceFile && sharedWorkspacePath) {
17990
- const copiedWorkspaceFile = import_node_path45.default.join(sharedWorkspacePath, import_node_path45.default.basename(suiteWorkspaceFile));
18221
+ const copiedWorkspaceFile = import_node_path46.default.join(sharedWorkspacePath, import_node_path46.default.basename(suiteWorkspaceFile));
17991
18222
  try {
17992
- await (0, import_promises30.stat)(copiedWorkspaceFile);
18223
+ await (0, import_promises31.stat)(copiedWorkspaceFile);
17993
18224
  suiteWorkspaceFile = copiedWorkspaceFile;
17994
18225
  } catch {
17995
18226
  }
@@ -18569,9 +18800,9 @@ async function runEvalCase(options) {
18569
18800
  );
18570
18801
  }
18571
18802
  if (caseWorkspaceFile && workspacePath) {
18572
- const copiedFile = import_node_path45.default.join(workspacePath, import_node_path45.default.basename(caseWorkspaceFile));
18803
+ const copiedFile = import_node_path46.default.join(workspacePath, import_node_path46.default.basename(caseWorkspaceFile));
18573
18804
  try {
18574
- await (0, import_promises30.stat)(copiedFile);
18805
+ await (0, import_promises31.stat)(copiedFile);
18575
18806
  caseWorkspaceFile = copiedFile;
18576
18807
  } catch {
18577
18808
  }
@@ -18579,7 +18810,7 @@ async function runEvalCase(options) {
18579
18810
  }
18580
18811
  if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
18581
18812
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
18582
- await (0, import_promises30.mkdir)(workspacePath, { recursive: true });
18813
+ await (0, import_promises31.mkdir)(workspacePath, { recursive: true });
18583
18814
  }
18584
18815
  if (evalCase.workspace?.repos?.length && workspacePath) {
18585
18816
  const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
@@ -18631,11 +18862,11 @@ async function runEvalCase(options) {
18631
18862
  const files = evalCase.metadata.agent_skills_files;
18632
18863
  if (baseDir && files.length > 0) {
18633
18864
  for (const relPath of files) {
18634
- const srcPath = import_node_path45.default.resolve(baseDir, relPath);
18635
- const destPath = import_node_path45.default.resolve(workspacePath, relPath);
18865
+ const srcPath = import_node_path46.default.resolve(baseDir, relPath);
18866
+ const destPath = import_node_path46.default.resolve(workspacePath, relPath);
18636
18867
  try {
18637
- await (0, import_promises30.mkdir)(import_node_path45.default.dirname(destPath), { recursive: true });
18638
- await (0, import_promises30.copyFile)(srcPath, destPath);
18868
+ await (0, import_promises31.mkdir)(import_node_path46.default.dirname(destPath), { recursive: true });
18869
+ await (0, import_promises31.copyFile)(srcPath, destPath);
18639
18870
  } catch (error) {
18640
18871
  const message = error instanceof Error ? error.message : String(error);
18641
18872
  return buildErrorResult(
@@ -19280,7 +19511,7 @@ async function runEvaluatorList(options) {
19280
19511
  fileChanges,
19281
19512
  workspacePath
19282
19513
  };
19283
- const evalFileDir = evalCase.file_paths[0] ? import_node_path45.default.dirname(evalCase.file_paths[0]) : process.cwd();
19514
+ const evalFileDir = evalCase.file_paths[0] ? import_node_path46.default.dirname(evalCase.file_paths[0]) : process.cwd();
19284
19515
  const dispatchContext = {
19285
19516
  graderProvider,
19286
19517
  targetResolver,
@@ -19510,7 +19741,7 @@ function extractProviderError(response) {
19510
19741
  return trimmed.length > 0 ? trimmed : void 0;
19511
19742
  }
19512
19743
  function createCacheKey(provider, target, evalCase, promptInputs) {
19513
- const hash = (0, import_node_crypto10.createHash)("sha256");
19744
+ const hash = (0, import_node_crypto11.createHash)("sha256");
19514
19745
  hash.update(provider.id);
19515
19746
  hash.update(target.name);
19516
19747
  hash.update(evalCase.id);
@@ -19613,8 +19844,8 @@ function computeWeightedMean(entries) {
19613
19844
  }
19614
19845
 
19615
19846
  // src/evaluation/evaluate.ts
19616
- var import_node_fs14 = require("fs");
19617
- var import_node_path46 = __toESM(require("path"), 1);
19847
+ var import_node_fs15 = require("fs");
19848
+ var import_node_path47 = __toESM(require("path"), 1);
19618
19849
 
19619
19850
  // src/evaluation/providers/function-provider.ts
19620
19851
  function createFunctionProvider(taskFn) {
@@ -19651,7 +19882,7 @@ async function evaluate(config) {
19651
19882
  }
19652
19883
  const gitRoot = await findGitRoot(process.cwd());
19653
19884
  const repoRoot = gitRoot ?? process.cwd();
19654
- const testFilePath = config.specFile ? import_node_path46.default.resolve(config.specFile) : import_node_path46.default.join(process.cwd(), "__programmatic__.yaml");
19885
+ const testFilePath = config.specFile ? import_node_path47.default.resolve(config.specFile) : import_node_path47.default.join(process.cwd(), "__programmatic__.yaml");
19655
19886
  await loadEnvHierarchy(repoRoot, testFilePath);
19656
19887
  let resolvedTarget;
19657
19888
  let taskProvider;
@@ -19772,11 +20003,11 @@ function computeSummary(results, durationMs) {
19772
20003
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
19773
20004
  async function discoverDefaultTarget(repoRoot) {
19774
20005
  const cwd = process.cwd();
19775
- const chain = buildDirectoryChain2(import_node_path46.default.join(cwd, "_placeholder"), repoRoot);
20006
+ const chain = buildDirectoryChain2(import_node_path47.default.join(cwd, "_placeholder"), repoRoot);
19776
20007
  for (const dir of chain) {
19777
20008
  for (const candidate of TARGET_FILE_CANDIDATES) {
19778
- const targetsPath = import_node_path46.default.join(dir, candidate);
19779
- if (!(0, import_node_fs14.existsSync)(targetsPath)) continue;
20009
+ const targetsPath = import_node_path47.default.join(dir, candidate);
20010
+ if (!(0, import_node_fs15.existsSync)(targetsPath)) continue;
19780
20011
  try {
19781
20012
  const definitions = await readTargetDefinitions(targetsPath);
19782
20013
  const defaultTarget = definitions.find((d) => d.name === "default");
@@ -19792,8 +20023,8 @@ async function loadEnvHierarchy(repoRoot, startPath) {
19792
20023
  const chain = buildDirectoryChain2(startPath, repoRoot);
19793
20024
  const envFiles = [];
19794
20025
  for (const dir of chain) {
19795
- const envPath = import_node_path46.default.join(dir, ".env");
19796
- if ((0, import_node_fs14.existsSync)(envPath)) envFiles.push(envPath);
20026
+ const envPath = import_node_path47.default.join(dir, ".env");
20027
+ if ((0, import_node_fs15.existsSync)(envPath)) envFiles.push(envPath);
19797
20028
  }
19798
20029
  for (let i = 0; i < envFiles.length; i++) {
19799
20030
  try {
@@ -19973,8 +20204,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
19973
20204
  }
19974
20205
 
19975
20206
  // src/evaluation/cache/response-cache.ts
19976
- var import_promises31 = require("fs/promises");
19977
- var import_node_path47 = __toESM(require("path"), 1);
20207
+ var import_promises32 = require("fs/promises");
20208
+ var import_node_path48 = __toESM(require("path"), 1);
19978
20209
  var DEFAULT_CACHE_PATH = ".agentv/cache";
19979
20210
  var ResponseCache = class {
19980
20211
  cachePath;
@@ -19984,7 +20215,7 @@ var ResponseCache = class {
19984
20215
  async get(key) {
19985
20216
  const filePath = this.keyToPath(key);
19986
20217
  try {
19987
- const data = await (0, import_promises31.readFile)(filePath, "utf8");
20218
+ const data = await (0, import_promises32.readFile)(filePath, "utf8");
19988
20219
  return JSON.parse(data);
19989
20220
  } catch {
19990
20221
  return void 0;
@@ -19992,13 +20223,13 @@ var ResponseCache = class {
19992
20223
  }
19993
20224
  async set(key, value) {
19994
20225
  const filePath = this.keyToPath(key);
19995
- const dir = import_node_path47.default.dirname(filePath);
19996
- await (0, import_promises31.mkdir)(dir, { recursive: true });
19997
- await (0, import_promises31.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
20226
+ const dir = import_node_path48.default.dirname(filePath);
20227
+ await (0, import_promises32.mkdir)(dir, { recursive: true });
20228
+ await (0, import_promises32.writeFile)(filePath, JSON.stringify(value, null, 2), "utf8");
19998
20229
  }
19999
20230
  keyToPath(key) {
20000
20231
  const prefix = key.slice(0, 2);
20001
- return import_node_path47.default.join(this.cachePath, prefix, `${key}.json`);
20232
+ return import_node_path48.default.join(this.cachePath, prefix, `${key}.json`);
20002
20233
  }
20003
20234
  };
20004
20235
  function shouldEnableCache(params) {
@@ -20196,6 +20427,17 @@ var OtelTraceExporter = class {
20196
20427
  if (result.durationMs != null)
20197
20428
  rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
20198
20429
  if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
20430
+ if (result.tokenUsage) {
20431
+ if (result.tokenUsage.input != null) {
20432
+ rootSpan.setAttribute("agentv.trace.token_input", result.tokenUsage.input);
20433
+ }
20434
+ if (result.tokenUsage.output != null) {
20435
+ rootSpan.setAttribute("agentv.trace.token_output", result.tokenUsage.output);
20436
+ }
20437
+ if (result.tokenUsage.cached != null) {
20438
+ rootSpan.setAttribute("agentv.trace.token_cached", result.tokenUsage.cached);
20439
+ }
20440
+ }
20199
20441
  if (result.trace) {
20200
20442
  const t = result.trace;
20201
20443
  rootSpan.setAttribute("agentv.trace.event_count", t.eventCount);
@@ -20298,6 +20540,7 @@ var OtelTraceExporter = class {
20298
20540
  tracer.startActiveSpan(
20299
20541
  spanName,
20300
20542
  { startTime: startHr },
20543
+ parentCtx,
20301
20544
  (span) => {
20302
20545
  if (isAssistant) {
20303
20546
  span.setAttribute("gen_ai.operation.name", "chat");
@@ -20330,6 +20573,7 @@ var OtelTraceExporter = class {
20330
20573
  tracer.startActiveSpan(
20331
20574
  `execute_tool ${tc.tool}`,
20332
20575
  {},
20576
+ msgCtx,
20333
20577
  (toolSpan) => {
20334
20578
  toolSpan.setAttribute("gen_ai.tool.name", tc.tool);
20335
20579
  if (tc.id) toolSpan.setAttribute("gen_ai.tool.call.id", tc.id);
@@ -20370,8 +20614,12 @@ var OtelStreamingObserver = class {
20370
20614
  rootSpan = null;
20371
20615
  // biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
20372
20616
  rootCtx = null;
20617
+ observedChildSpans = false;
20618
+ pendingMetrics = null;
20373
20619
  /** Create root eval span immediately (visible in backend right away) */
20374
20620
  startEvalCase(testId, target, evalSet) {
20621
+ this.pendingMetrics = null;
20622
+ this.observedChildSpans = false;
20375
20623
  const ctx = this.parentCtx ?? this.api.context.active();
20376
20624
  this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
20377
20625
  this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
@@ -20384,8 +20632,9 @@ var OtelStreamingObserver = class {
20384
20632
  /** Create and immediately export a tool span */
20385
20633
  onToolCall(name, input, output, _durationMs, toolCallId) {
20386
20634
  if (!this.rootCtx) return;
20635
+ this.observedChildSpans = true;
20387
20636
  this.api.context.with(this.rootCtx, () => {
20388
- const span = this.tracer.startSpan(`execute_tool ${name}`);
20637
+ const span = this.tracer.startSpan(`execute_tool ${name}`, void 0, this.rootCtx);
20389
20638
  span.setAttribute("gen_ai.tool.name", name);
20390
20639
  if (toolCallId) span.setAttribute("gen_ai.tool.call.id", toolCallId);
20391
20640
  if (this.captureContent) {
@@ -20406,8 +20655,9 @@ var OtelStreamingObserver = class {
20406
20655
  /** Create and immediately export an LLM span */
20407
20656
  onLlmCall(model, tokenUsage) {
20408
20657
  if (!this.rootCtx) return;
20658
+ this.observedChildSpans = true;
20409
20659
  this.api.context.with(this.rootCtx, () => {
20410
- const span = this.tracer.startSpan(`chat ${model}`);
20660
+ const span = this.tracer.startSpan(`chat ${model}`, void 0, this.rootCtx);
20411
20661
  span.setAttribute("gen_ai.operation.name", "chat");
20412
20662
  span.setAttribute("gen_ai.request.model", model);
20413
20663
  span.setAttribute("gen_ai.response.model", model);
@@ -20422,10 +20672,53 @@ var OtelStreamingObserver = class {
20422
20672
  span.end();
20423
20673
  });
20424
20674
  }
20675
+ /** Record final execution metrics before the root span is finalized. */
20676
+ recordEvalMetrics(result) {
20677
+ this.pendingMetrics = result;
20678
+ }
20425
20679
  /** Finalize root span with score/verdict after evaluation completes */
20426
20680
  finalizeEvalCase(score, error) {
20427
20681
  if (!this.rootSpan) return;
20428
20682
  this.rootSpan.setAttribute("agentv.score", score);
20683
+ if (this.pendingMetrics?.durationMs != null) {
20684
+ this.rootSpan.setAttribute("agentv.trace.duration_ms", this.pendingMetrics.durationMs);
20685
+ }
20686
+ if (this.pendingMetrics?.costUsd != null) {
20687
+ this.rootSpan.setAttribute("agentv.trace.cost_usd", this.pendingMetrics.costUsd);
20688
+ }
20689
+ if (this.pendingMetrics?.tokenUsage) {
20690
+ if (this.pendingMetrics.tokenUsage.input != null) {
20691
+ this.rootSpan.setAttribute(
20692
+ "agentv.trace.token_input",
20693
+ this.pendingMetrics.tokenUsage.input
20694
+ );
20695
+ }
20696
+ if (this.pendingMetrics.tokenUsage.output != null) {
20697
+ this.rootSpan.setAttribute(
20698
+ "agentv.trace.token_output",
20699
+ this.pendingMetrics.tokenUsage.output
20700
+ );
20701
+ }
20702
+ if (this.pendingMetrics.tokenUsage.cached != null) {
20703
+ this.rootSpan.setAttribute(
20704
+ "agentv.trace.token_cached",
20705
+ this.pendingMetrics.tokenUsage.cached
20706
+ );
20707
+ }
20708
+ }
20709
+ if (this.pendingMetrics?.trace) {
20710
+ this.rootSpan.setAttribute("agentv.trace.event_count", this.pendingMetrics.trace.eventCount);
20711
+ this.rootSpan.setAttribute(
20712
+ "agentv.trace.tool_names",
20713
+ Object.keys(this.pendingMetrics.trace.toolCalls).sort().join(",")
20714
+ );
20715
+ if (this.pendingMetrics.trace.llmCallCount != null) {
20716
+ this.rootSpan.setAttribute(
20717
+ "agentv.trace.llm_call_count",
20718
+ this.pendingMetrics.trace.llmCallCount
20719
+ );
20720
+ }
20721
+ }
20429
20722
  if (error) {
20430
20723
  this.rootSpan.setStatus({ code: this.api.SpanStatusCode.ERROR, message: error });
20431
20724
  } else {
@@ -20434,6 +20727,33 @@ var OtelStreamingObserver = class {
20434
20727
  this.rootSpan.end();
20435
20728
  this.rootSpan = null;
20436
20729
  this.rootCtx = null;
20730
+ this.observedChildSpans = false;
20731
+ this.pendingMetrics = null;
20732
+ }
20733
+ /** Backfill child spans from the completed result when the provider emitted no live callbacks. */
20734
+ completeFromResult(result) {
20735
+ this.recordEvalMetrics({
20736
+ durationMs: result.durationMs,
20737
+ costUsd: result.costUsd,
20738
+ tokenUsage: result.tokenUsage,
20739
+ trace: result.trace
20740
+ });
20741
+ if (this.observedChildSpans || !this.rootCtx) {
20742
+ return;
20743
+ }
20744
+ const model = result.output.find((msg) => msg.role === "assistant")?.metadata?.model ?? result.target ?? "unknown";
20745
+ this.onLlmCall(String(model), result.tokenUsage);
20746
+ for (const message of result.output) {
20747
+ for (const toolCall of message.toolCalls ?? []) {
20748
+ this.onToolCall(
20749
+ toolCall.tool,
20750
+ toolCall.input,
20751
+ toolCall.output,
20752
+ toolCall.durationMs ?? 0,
20753
+ toolCall.id
20754
+ );
20755
+ }
20756
+ }
20437
20757
  }
20438
20758
  /** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
20439
20759
  getActiveSpanIds() {