@agentv/core 3.11.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -19,7 +19,7 @@ import {
19
19
  readTextFile,
20
20
  resolveFileReference,
21
21
  resolveTargetDefinition
22
- } from "./chunk-AVTN5AB7.js";
22
+ } from "./chunk-4XWPXNQM.js";
23
23
  import {
24
24
  AgentvProvider
25
25
  } from "./chunk-W5YDZWT4.js";
@@ -28,7 +28,7 @@ import {
28
28
  } from "./chunk-HFSYZHGF.js";
29
29
  import {
30
30
  SimpleTraceFileExporter
31
- } from "./chunk-HMXZ2AX4.js";
31
+ } from "./chunk-3G2KXH7N.js";
32
32
 
33
33
  // src/evaluation/trace.ts
34
34
  function computeTraceSummary(messages) {
@@ -6793,265 +6793,7 @@ var MockProvider = class {
6793
6793
  }
6794
6794
  };
6795
6795
 
6796
- // src/evaluation/providers/pi-utils.ts
6797
- function extractPiTextContent(content) {
6798
- if (typeof content === "string") {
6799
- return content;
6800
- }
6801
- if (!Array.isArray(content)) {
6802
- return void 0;
6803
- }
6804
- const textParts = [];
6805
- for (const part of content) {
6806
- if (!part || typeof part !== "object") {
6807
- continue;
6808
- }
6809
- const p = part;
6810
- if (p.type === "text" && typeof p.text === "string") {
6811
- textParts.push(p.text);
6812
- }
6813
- }
6814
- return textParts.length > 0 ? textParts.join("\n") : void 0;
6815
- }
6816
- function toFiniteNumber(value) {
6817
- if (typeof value === "number" && Number.isFinite(value)) return value;
6818
- return void 0;
6819
- }
6820
-
6821
- // src/evaluation/providers/pi-agent-sdk.ts
6822
- var piAgentModule = null;
6823
- var piAiModule = null;
6824
- async function loadPiModules() {
6825
- if (!piAgentModule || !piAiModule) {
6826
- try {
6827
- [piAgentModule, piAiModule] = await Promise.all([
6828
- import("@mariozechner/pi-agent-core"),
6829
- import("@mariozechner/pi-ai")
6830
- ]);
6831
- } catch (error) {
6832
- throw new Error(
6833
- `Failed to load pi-agent-sdk dependencies. Please install them:
6834
- npm install @mariozechner/pi-agent-core @mariozechner/pi-ai
6835
-
6836
- Original error: ${error instanceof Error ? error.message : String(error)}`
6837
- );
6838
- }
6839
- }
6840
- return {
6841
- Agent: piAgentModule.Agent,
6842
- getModel: piAiModule.getModel,
6843
- getEnvApiKey: piAiModule.getEnvApiKey
6844
- };
6845
- }
6846
- var PiAgentSdkProvider = class {
6847
- id;
6848
- kind = "pi-agent-sdk";
6849
- targetName;
6850
- supportsBatch = false;
6851
- config;
6852
- constructor(targetName, config) {
6853
- this.id = `pi-agent-sdk:${targetName}`;
6854
- this.targetName = targetName;
6855
- this.config = config;
6856
- }
6857
- async invoke(request) {
6858
- if (request.signal?.aborted) {
6859
- throw new Error("Pi agent SDK request was aborted before execution");
6860
- }
6861
- const { Agent, getModel, getEnvApiKey } = await loadPiModules();
6862
- const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
6863
- const startMs = Date.now();
6864
- const providerName = this.config.subprovider ?? "anthropic";
6865
- const modelId = this.config.model ?? "claude-sonnet-4-20250514";
6866
- const model = getModel(providerName, modelId);
6867
- const systemPrompt = this.config.systemPrompt ?? "Answer directly and concisely.";
6868
- const agent = new Agent({
6869
- initialState: {
6870
- systemPrompt,
6871
- model,
6872
- tools: [],
6873
- // No tools for simple Q&A
6874
- messages: []
6875
- },
6876
- getApiKey: async (provider) => {
6877
- return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
6878
- }
6879
- });
6880
- let tokenUsage;
6881
- let costUsd;
6882
- const toolTrackers = /* @__PURE__ */ new Map();
6883
- const completedToolResults = /* @__PURE__ */ new Map();
6884
- const unsubscribe = agent.subscribe((event) => {
6885
- switch (event.type) {
6886
- case "message_end": {
6887
- const msg = event.message;
6888
- if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
6889
- const usage = msg.usage;
6890
- if (usage && typeof usage === "object") {
6891
- const u = usage;
6892
- const input = toFiniteNumber(u.input);
6893
- const output = toFiniteNumber(u.output);
6894
- const cached = toFiniteNumber(u.cacheRead);
6895
- let callDelta;
6896
- if (input !== void 0 || output !== void 0) {
6897
- callDelta = {
6898
- input: input ?? 0,
6899
- output: output ?? 0,
6900
- ...cached !== void 0 ? { cached } : {}
6901
- };
6902
- tokenUsage = {
6903
- input: (tokenUsage?.input ?? 0) + callDelta.input,
6904
- output: (tokenUsage?.output ?? 0) + callDelta.output,
6905
- ...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
6906
- };
6907
- }
6908
- const cost = u.cost;
6909
- if (cost && typeof cost === "object") {
6910
- const total = toFiniteNumber(cost.total);
6911
- if (total !== void 0) {
6912
- costUsd = (costUsd ?? 0) + total;
6913
- }
6914
- }
6915
- request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
6916
- }
6917
- }
6918
- break;
6919
- }
6920
- case "tool_execution_start": {
6921
- toolTrackers.set(event.toolCallId, {
6922
- toolCallId: event.toolCallId,
6923
- toolName: event.toolName,
6924
- args: event.args,
6925
- startMs: Date.now(),
6926
- startTime: (/* @__PURE__ */ new Date()).toISOString()
6927
- });
6928
- request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
6929
- break;
6930
- }
6931
- case "tool_execution_end": {
6932
- const tracker = toolTrackers.get(event.toolCallId);
6933
- const durationMs = tracker ? Date.now() - tracker.startMs : 0;
6934
- completedToolResults.set(event.toolCallId, {
6935
- output: event.result,
6936
- durationMs
6937
- });
6938
- request.streamCallbacks?.onToolCallEnd?.(
6939
- event.toolName,
6940
- tracker?.args,
6941
- event.result,
6942
- durationMs,
6943
- event.toolCallId
6944
- );
6945
- toolTrackers.delete(event.toolCallId);
6946
- break;
6947
- }
6948
- }
6949
- });
6950
- try {
6951
- if (this.config.timeoutMs) {
6952
- const timeoutMs = this.config.timeoutMs;
6953
- const timeoutPromise = new Promise((_, reject) => {
6954
- setTimeout(
6955
- () => reject(new Error(`Pi agent SDK timed out after ${timeoutMs}ms`)),
6956
- timeoutMs
6957
- );
6958
- });
6959
- await Promise.race([agent.prompt(request.question), timeoutPromise]);
6960
- } else {
6961
- await agent.prompt(request.question);
6962
- }
6963
- await agent.waitForIdle();
6964
- const agentMessages = agent.state.messages;
6965
- const output = [];
6966
- for (const msg of agentMessages) {
6967
- output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
6968
- }
6969
- const endTimeIso = (/* @__PURE__ */ new Date()).toISOString();
6970
- const durationMs = Date.now() - startMs;
6971
- return {
6972
- raw: {
6973
- messages: agentMessages,
6974
- systemPrompt,
6975
- model: this.config.model,
6976
- subprovider: this.config.subprovider
6977
- },
6978
- output,
6979
- tokenUsage,
6980
- costUsd,
6981
- durationMs,
6982
- startTime: startTimeIso,
6983
- endTime: endTimeIso
6984
- };
6985
- } finally {
6986
- unsubscribe();
6987
- }
6988
- }
6989
- };
6990
- function convertAgentMessage(message, toolTrackers, completedToolResults) {
6991
- if (!message || typeof message !== "object") {
6992
- return { role: "unknown", content: String(message) };
6993
- }
6994
- const msg = message;
6995
- const role = typeof msg.role === "string" ? msg.role : "unknown";
6996
- const content = extractPiTextContent(msg.content);
6997
- const toolCalls = extractToolCalls3(msg.content, toolTrackers, completedToolResults);
6998
- const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
6999
- let msgTokenUsage;
7000
- if (msg.usage && typeof msg.usage === "object") {
7001
- const u = msg.usage;
7002
- const input = toFiniteNumber(u.input);
7003
- const output = toFiniteNumber(u.output);
7004
- if (input !== void 0 || output !== void 0) {
7005
- msgTokenUsage = {
7006
- input: input ?? 0,
7007
- output: output ?? 0,
7008
- ...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
7009
- };
7010
- }
7011
- }
7012
- const metadata = {};
7013
- if (msg.api) metadata.api = msg.api;
7014
- if (msg.provider) metadata.provider = msg.provider;
7015
- if (msg.model) metadata.model = msg.model;
7016
- if (msg.stopReason) metadata.stopReason = msg.stopReason;
7017
- return {
7018
- role,
7019
- content,
7020
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
7021
- startTime,
7022
- metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
7023
- tokenUsage: msgTokenUsage
7024
- };
7025
- }
7026
- function extractToolCalls3(content, toolTrackers, completedToolResults) {
7027
- if (!Array.isArray(content)) {
7028
- return [];
7029
- }
7030
- const toolCalls = [];
7031
- for (const part of content) {
7032
- if (!part || typeof part !== "object") {
7033
- continue;
7034
- }
7035
- const p = part;
7036
- if (p.type === "toolCall" && typeof p.name === "string") {
7037
- const id = typeof p.id === "string" ? p.id : void 0;
7038
- const tracker = id ? toolTrackers.get(id) : void 0;
7039
- const completed = id ? completedToolResults.get(id) : void 0;
7040
- toolCalls.push({
7041
- tool: p.name,
7042
- input: p.arguments,
7043
- id,
7044
- output: completed?.output,
7045
- durationMs: completed?.durationMs,
7046
- startTime: tracker?.startTime,
7047
- endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
7048
- });
7049
- }
7050
- }
7051
- return toolCalls;
7052
- }
7053
-
7054
- // src/evaluation/providers/pi-coding-agent.ts
6796
+ // src/evaluation/providers/pi-cli.ts
7055
6797
  import { spawn as spawn3 } from "node:child_process";
7056
6798
  import { randomUUID as randomUUID7 } from "node:crypto";
7057
6799
  import { createWriteStream as createWriteStream5 } from "node:fs";
@@ -7112,25 +6854,50 @@ function subscribeToPiLogEntries(listener) {
7112
6854
  };
7113
6855
  }
7114
6856
 
7115
- // src/evaluation/providers/pi-coding-agent.ts
6857
+ // src/evaluation/providers/pi-utils.ts
6858
+ function extractPiTextContent(content) {
6859
+ if (typeof content === "string") {
6860
+ return content;
6861
+ }
6862
+ if (!Array.isArray(content)) {
6863
+ return void 0;
6864
+ }
6865
+ const textParts = [];
6866
+ for (const part of content) {
6867
+ if (!part || typeof part !== "object") {
6868
+ continue;
6869
+ }
6870
+ const p = part;
6871
+ if (p.type === "text" && typeof p.text === "string") {
6872
+ textParts.push(p.text);
6873
+ }
6874
+ }
6875
+ return textParts.length > 0 ? textParts.join("\n") : void 0;
6876
+ }
6877
+ function toFiniteNumber(value) {
6878
+ if (typeof value === "number" && Number.isFinite(value)) return value;
6879
+ return void 0;
6880
+ }
6881
+
6882
+ // src/evaluation/providers/pi-cli.ts
7116
6883
  var WORKSPACE_PREFIX = "agentv-pi-";
7117
6884
  var PROMPT_FILENAME = "prompt.md";
7118
- var PiCodingAgentProvider = class {
6885
+ var PiCliProvider = class {
7119
6886
  id;
7120
- kind = "pi-coding-agent";
6887
+ kind = "pi-cli";
7121
6888
  targetName;
7122
6889
  supportsBatch = false;
7123
6890
  config;
7124
6891
  runPi;
7125
6892
  constructor(targetName, config, runner = defaultPiRunner) {
7126
- this.id = `pi-coding-agent:${targetName}`;
6893
+ this.id = `pi-cli:${targetName}`;
7127
6894
  this.targetName = targetName;
7128
6895
  this.config = config;
7129
6896
  this.runPi = runner;
7130
6897
  }
7131
6898
  async invoke(request) {
7132
6899
  if (request.signal?.aborted) {
7133
- throw new Error("Pi coding agent request was aborted before execution");
6900
+ throw new Error("Pi CLI request was aborted before execution");
7134
6901
  }
7135
6902
  const inputFiles = normalizeInputFiles(request.inputFiles);
7136
6903
  const startTime = (/* @__PURE__ */ new Date()).toISOString();
@@ -7140,17 +6907,17 @@ var PiCodingAgentProvider = class {
7140
6907
  try {
7141
6908
  const promptFile = path17.join(workspaceRoot, PROMPT_FILENAME);
7142
6909
  await writeFile(promptFile, request.question, "utf8");
7143
- const args = this.buildPiArgs(request.question, inputFiles, request.captureFileChanges);
6910
+ const args = this.buildPiArgs(request.question, inputFiles);
7144
6911
  const cwd = this.resolveCwd(workspaceRoot, request.cwd);
7145
6912
  const result = await this.executePi(args, cwd, request.signal, logger);
7146
6913
  if (result.timedOut) {
7147
6914
  throw new Error(
7148
- `Pi coding agent timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
6915
+ `Pi CLI timed out${formatTimeoutSuffix3(this.config.timeoutMs ?? void 0)}`
7149
6916
  );
7150
6917
  }
7151
6918
  if (result.exitCode !== 0) {
7152
6919
  const detail = pickDetail(result.stderr, result.stdout);
7153
- const prefix = `Pi coding agent exited with code ${result.exitCode}`;
6920
+ const prefix = `Pi CLI exited with code ${result.exitCode}`;
7154
6921
  throw new Error(detail ? `${prefix}: ${detail}` : prefix);
7155
6922
  }
7156
6923
  const parsed = parsePiJsonl(result.stdout);
@@ -7207,7 +6974,7 @@ var PiCodingAgentProvider = class {
7207
6974
  }
7208
6975
  return path17.resolve(this.config.cwd);
7209
6976
  }
7210
- buildPiArgs(prompt, inputFiles, _captureFileChanges) {
6977
+ buildPiArgs(prompt, inputFiles) {
7211
6978
  const args = [];
7212
6979
  if (this.config.subprovider) {
7213
6980
  args.push("--provider", this.config.subprovider);
@@ -7259,7 +7026,7 @@ ${prompt}` : prompt;
7259
7026
  const err = error;
7260
7027
  if (err.code === "ENOENT") {
7261
7028
  throw new Error(
7262
- `Pi coding agent executable '${this.config.executable}' was not found. Update the target settings.executable or add it to PATH.`
7029
+ `Pi CLI executable '${this.config.executable}' was not found. Update the target executable or add it to PATH.`
7263
7030
  );
7264
7031
  }
7265
7032
  throw error;
@@ -7269,26 +7036,18 @@ ${prompt}` : prompt;
7269
7036
  const env = { ...process.env };
7270
7037
  if (this.config.apiKey) {
7271
7038
  const provider = this.config.subprovider?.toLowerCase() ?? "google";
7272
- switch (provider) {
7273
- case "google":
7274
- case "gemini":
7275
- env.GEMINI_API_KEY = this.config.apiKey;
7276
- break;
7277
- case "anthropic":
7278
- env.ANTHROPIC_API_KEY = this.config.apiKey;
7279
- break;
7280
- case "openai":
7281
- env.OPENAI_API_KEY = this.config.apiKey;
7282
- break;
7283
- case "groq":
7284
- env.GROQ_API_KEY = this.config.apiKey;
7285
- break;
7286
- case "xai":
7287
- env.XAI_API_KEY = this.config.apiKey;
7288
- break;
7289
- case "openrouter":
7290
- env.OPENROUTER_API_KEY = this.config.apiKey;
7291
- break;
7039
+ const ENV_KEY_MAP = {
7040
+ google: "GEMINI_API_KEY",
7041
+ gemini: "GEMINI_API_KEY",
7042
+ anthropic: "ANTHROPIC_API_KEY",
7043
+ openai: "OPENAI_API_KEY",
7044
+ groq: "GROQ_API_KEY",
7045
+ xai: "XAI_API_KEY",
7046
+ openrouter: "OPENROUTER_API_KEY"
7047
+ };
7048
+ const envKey = ENV_KEY_MAP[provider];
7049
+ if (envKey) {
7050
+ env[envKey] = this.config.apiKey;
7292
7051
  }
7293
7052
  }
7294
7053
  return env;
@@ -7306,7 +7065,7 @@ ${prompt}` : prompt;
7306
7065
  if (this.config.logDir) {
7307
7066
  return path17.resolve(this.config.logDir);
7308
7067
  }
7309
- return path17.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
7068
+ return path17.join(process.cwd(), ".agentv", "logs", "pi-cli");
7310
7069
  }
7311
7070
  async createStreamLogger(request) {
7312
7071
  const logDir = this.resolveLogDirectory();
@@ -7358,7 +7117,7 @@ var PiStreamLogger = class _PiStreamLogger {
7358
7117
  static async create(options) {
7359
7118
  const logger = new _PiStreamLogger(options.filePath, options.format);
7360
7119
  const header = [
7361
- "# Pi Coding Agent stream log",
7120
+ "# Pi CLI stream log",
7362
7121
  `# target: ${options.targetName}`,
7363
7122
  options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
7364
7123
  options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
@@ -7507,10 +7266,10 @@ function summarizePiEvent(event) {
7507
7266
  return `${type}: ${role}`;
7508
7267
  }
7509
7268
  case "message_update": {
7510
- const event2 = record.assistantMessageEvent;
7511
- const eventType = event2?.type;
7269
+ const evt = record.assistantMessageEvent;
7270
+ const eventType = evt?.type;
7512
7271
  if (eventType === "text_delta") {
7513
- const delta = event2?.delta;
7272
+ const delta = evt?.delta;
7514
7273
  if (typeof delta === "string") {
7515
7274
  const preview = delta.length > 50 ? `${delta.slice(0, 50)}...` : delta;
7516
7275
  return `text_delta: ${preview}`;
@@ -7532,7 +7291,7 @@ function tryParseJsonValue(rawLine) {
7532
7291
  function parsePiJsonl(output) {
7533
7292
  const trimmed = output.trim();
7534
7293
  if (trimmed.length === 0) {
7535
- throw new Error("Pi coding agent produced no output");
7294
+ throw new Error("Pi CLI produced no output");
7536
7295
  }
7537
7296
  const lines = trimmed.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.length > 0);
7538
7297
  const parsed = [];
@@ -7543,38 +7302,27 @@ function parsePiJsonl(output) {
7543
7302
  }
7544
7303
  }
7545
7304
  if (parsed.length === 0) {
7546
- throw new Error("Pi coding agent produced no valid JSON output");
7305
+ throw new Error("Pi CLI produced no valid JSON output");
7547
7306
  }
7548
7307
  return parsed;
7549
7308
  }
7550
7309
  function extractMessages(events) {
7551
7310
  for (let i = events.length - 1; i >= 0; i--) {
7552
7311
  const event = events[i];
7553
- if (!event || typeof event !== "object") {
7554
- continue;
7555
- }
7312
+ if (!event || typeof event !== "object") continue;
7556
7313
  const record = event;
7557
- if (record.type !== "agent_end") {
7558
- continue;
7559
- }
7314
+ if (record.type !== "agent_end") continue;
7560
7315
  const messages = record.messages;
7561
- if (!Array.isArray(messages)) {
7562
- continue;
7563
- }
7316
+ if (!Array.isArray(messages)) continue;
7564
7317
  return messages.map(convertPiMessage).filter((m) => m !== void 0);
7565
7318
  }
7566
7319
  const output = [];
7567
7320
  for (const event of events) {
7568
- if (!event || typeof event !== "object") {
7569
- continue;
7570
- }
7321
+ if (!event || typeof event !== "object") continue;
7571
7322
  const record = event;
7572
7323
  if (record.type === "turn_end") {
7573
- const message = record.message;
7574
- const converted = convertPiMessage(message);
7575
- if (converted) {
7576
- output.push(converted);
7577
- }
7324
+ const converted = convertPiMessage(record.message);
7325
+ if (converted) output.push(converted);
7578
7326
  }
7579
7327
  }
7580
7328
  return output;
@@ -7591,10 +7339,7 @@ function extractTokenUsage(events) {
7591
7339
  const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
7592
7340
  const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
7593
7341
  if (input !== void 0 || output !== void 0) {
7594
- const result = {
7595
- input: input ?? 0,
7596
- output: output ?? 0
7597
- };
7342
+ const result = { input: input ?? 0, output: output ?? 0 };
7598
7343
  const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
7599
7344
  const reasoning = toFiniteNumber(u.reasoning_tokens ?? u.reasoningTokens ?? u.reasoning);
7600
7345
  return {
@@ -7634,40 +7379,577 @@ function aggregateUsageFromMessages(messages) {
7634
7379
  }
7635
7380
  }
7636
7381
  }
7637
- if (!found) return void 0;
7638
- const result = { input: totalInput, output: totalOutput };
7639
- if (totalCached !== void 0) {
7640
- return { ...result, cached: totalCached };
7382
+ if (!found) return void 0;
7383
+ const result = { input: totalInput, output: totalOutput };
7384
+ if (totalCached !== void 0) {
7385
+ return { ...result, cached: totalCached };
7386
+ }
7387
+ return result;
7388
+ }
7389
+ function convertPiMessage(message) {
7390
+ if (!message || typeof message !== "object") return void 0;
7391
+ const msg = message;
7392
+ const role = msg.role;
7393
+ if (typeof role !== "string") return void 0;
7394
+ const content = extractPiTextContent(msg.content);
7395
+ const toolCalls = extractToolCalls3(msg.content);
7396
+ const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
7397
+ const metadata = {};
7398
+ if (msg.api) metadata.api = msg.api;
7399
+ if (msg.provider) metadata.provider = msg.provider;
7400
+ if (msg.model) metadata.model = msg.model;
7401
+ if (msg.usage) metadata.usage = msg.usage;
7402
+ if (msg.stopReason) metadata.stopReason = msg.stopReason;
7403
+ return {
7404
+ role,
7405
+ content,
7406
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
7407
+ startTime,
7408
+ metadata: Object.keys(metadata).length > 0 ? metadata : void 0
7409
+ };
7410
+ }
7411
+ function extractToolCalls3(content) {
7412
+ if (!Array.isArray(content)) return [];
7413
+ const toolCalls = [];
7414
+ for (const part of content) {
7415
+ if (!part || typeof part !== "object") continue;
7416
+ const p = part;
7417
+ if (p.type === "tool_use" && typeof p.name === "string") {
7418
+ toolCalls.push({
7419
+ tool: p.name,
7420
+ input: p.input,
7421
+ id: typeof p.id === "string" ? p.id : void 0
7422
+ });
7423
+ }
7424
+ if (p.type === "toolCall" && typeof p.name === "string") {
7425
+ toolCalls.push({
7426
+ tool: p.name,
7427
+ input: p.arguments,
7428
+ id: typeof p.id === "string" ? p.id : void 0
7429
+ });
7430
+ }
7431
+ if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
7432
+ const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
7433
+ if (existing) {
7434
+ const idx = toolCalls.indexOf(existing);
7435
+ toolCalls[idx] = { ...existing, output: p.content };
7436
+ }
7437
+ }
7438
+ }
7439
+ return toolCalls;
7440
+ }
7441
+ function escapeAtSymbols(prompt) {
7442
+ return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
7443
+ }
7444
+ function pickDetail(stderr, stdout) {
7445
+ const errorText = stderr.trim();
7446
+ if (errorText.length > 0) return errorText;
7447
+ const stdoutText = stdout.trim();
7448
+ return stdoutText.length > 0 ? stdoutText : void 0;
7449
+ }
7450
+ function formatTimeoutSuffix3(timeoutMs) {
7451
+ if (!timeoutMs || timeoutMs <= 0) return "";
7452
+ return ` after ${Math.ceil(timeoutMs / 1e3)}s`;
7453
+ }
7454
+ async function defaultPiRunner(options) {
7455
+ return await new Promise((resolve, reject) => {
7456
+ const parts = options.executable.split(/\s+/);
7457
+ const executable = parts[0];
7458
+ const executableArgs = parts.slice(1);
7459
+ const allArgs = [...executableArgs, ...options.args];
7460
+ const child = spawn3(executable, allArgs, {
7461
+ cwd: options.cwd,
7462
+ env: options.env,
7463
+ stdio: ["pipe", "pipe", "pipe"],
7464
+ shell: false
7465
+ });
7466
+ let stdout = "";
7467
+ let stderr = "";
7468
+ let timedOut = false;
7469
+ const onAbort = () => {
7470
+ child.kill("SIGTERM");
7471
+ };
7472
+ if (options.signal) {
7473
+ if (options.signal.aborted) {
7474
+ onAbort();
7475
+ } else {
7476
+ options.signal.addEventListener("abort", onAbort, { once: true });
7477
+ }
7478
+ }
7479
+ let timeoutHandle;
7480
+ if (options.timeoutMs && options.timeoutMs > 0) {
7481
+ timeoutHandle = setTimeout(() => {
7482
+ timedOut = true;
7483
+ child.kill("SIGTERM");
7484
+ }, options.timeoutMs);
7485
+ timeoutHandle.unref?.();
7486
+ }
7487
+ child.stdout.setEncoding("utf8");
7488
+ child.stdout.on("data", (chunk) => {
7489
+ stdout += chunk;
7490
+ options.onStdoutChunk?.(chunk);
7491
+ });
7492
+ child.stderr.setEncoding("utf8");
7493
+ child.stderr.on("data", (chunk) => {
7494
+ stderr += chunk;
7495
+ options.onStderrChunk?.(chunk);
7496
+ });
7497
+ child.stdin.end();
7498
+ const cleanup = () => {
7499
+ if (timeoutHandle) clearTimeout(timeoutHandle);
7500
+ if (options.signal) options.signal.removeEventListener("abort", onAbort);
7501
+ };
7502
+ child.on("error", (error) => {
7503
+ cleanup();
7504
+ reject(error);
7505
+ });
7506
+ child.on("close", (code) => {
7507
+ cleanup();
7508
+ resolve({
7509
+ stdout,
7510
+ stderr,
7511
+ exitCode: typeof code === "number" ? code : -1,
7512
+ timedOut
7513
+ });
7514
+ });
7515
+ });
7516
+ }
7517
+
7518
+ // src/evaluation/providers/pi-coding-agent.ts
7519
+ import { execSync } from "node:child_process";
7520
+ import { randomUUID as randomUUID8 } from "node:crypto";
7521
+ import { createWriteStream as createWriteStream6 } from "node:fs";
7522
+ import { mkdir as mkdir7 } from "node:fs/promises";
7523
+ import path18 from "node:path";
7524
+ import { createInterface } from "node:readline";
7525
+ var piCodingAgentModule = null;
7526
+ var piAiModule = null;
7527
+ async function promptInstall() {
7528
+ if (!process.stdout.isTTY) return false;
7529
+ const rl = createInterface({ input: process.stdin, output: process.stderr });
7530
+ try {
7531
+ return await new Promise((resolve) => {
7532
+ rl.question(
7533
+ "@mariozechner/pi-coding-agent is not installed. Install it now? (y/N) ",
7534
+ (answer) => resolve(answer.trim().toLowerCase() === "y")
7535
+ );
7536
+ });
7537
+ } finally {
7538
+ rl.close();
7539
+ }
7540
+ }
7541
+ async function loadSdkModules() {
7542
+ if (!piCodingAgentModule || !piAiModule) {
7543
+ try {
7544
+ [piCodingAgentModule, piAiModule] = await Promise.all([
7545
+ import("@mariozechner/pi-coding-agent"),
7546
+ import("@mariozechner/pi-ai")
7547
+ ]);
7548
+ } catch {
7549
+ if (await promptInstall()) {
7550
+ console.error("Installing @mariozechner/pi-coding-agent...");
7551
+ execSync("bun add @mariozechner/pi-coding-agent", { stdio: "inherit" });
7552
+ [piCodingAgentModule, piAiModule] = await Promise.all([
7553
+ import("@mariozechner/pi-coding-agent"),
7554
+ import("@mariozechner/pi-ai")
7555
+ ]);
7556
+ } else {
7557
+ throw new Error(
7558
+ "pi-coding-agent SDK is not installed. Install it with:\n bun add @mariozechner/pi-coding-agent"
7559
+ );
7560
+ }
7561
+ }
7562
+ }
7563
+ const toolMap = {
7564
+ read: piCodingAgentModule.readTool,
7565
+ bash: piCodingAgentModule.bashTool,
7566
+ edit: piCodingAgentModule.editTool,
7567
+ write: piCodingAgentModule.writeTool,
7568
+ grep: piCodingAgentModule.grepTool,
7569
+ find: piCodingAgentModule.findTool,
7570
+ ls: piCodingAgentModule.lsTool
7571
+ };
7572
+ return {
7573
+ createAgentSession: piCodingAgentModule.createAgentSession,
7574
+ codingTools: piCodingAgentModule.codingTools,
7575
+ toolMap,
7576
+ SessionManager: piCodingAgentModule.SessionManager,
7577
+ getModel: piAiModule.getModel
7578
+ };
7579
+ }
7580
+ var PiCodingAgentProvider = class {
7581
+ id;
7582
+ kind = "pi-coding-agent";
7583
+ targetName;
7584
+ supportsBatch = false;
7585
+ config;
7586
+ constructor(targetName, config) {
7587
+ this.id = `pi-coding-agent:${targetName}`;
7588
+ this.targetName = targetName;
7589
+ this.config = config;
7590
+ }
7591
+ async invoke(request) {
7592
+ if (request.signal?.aborted) {
7593
+ throw new Error("Pi coding agent request was aborted before execution");
7594
+ }
7595
+ const inputFiles = normalizeInputFiles(request.inputFiles);
7596
+ const startTime = (/* @__PURE__ */ new Date()).toISOString();
7597
+ const startMs = Date.now();
7598
+ const sdk = await loadSdkModules();
7599
+ const logger = await this.createStreamLogger(request).catch(() => void 0);
7600
+ try {
7601
+ const cwd = this.resolveCwd(request.cwd);
7602
+ const providerName = this.config.subprovider ?? "google";
7603
+ const modelId = this.config.model ?? "gemini-2.5-flash";
7604
+ this.setApiKeyEnv(providerName);
7605
+ const model = sdk.getModel(providerName, modelId);
7606
+ const tools = this.resolveTools(sdk);
7607
+ const { session } = await sdk.createAgentSession({
7608
+ cwd,
7609
+ model,
7610
+ tools,
7611
+ thinkingLevel: this.config.thinking,
7612
+ sessionManager: sdk.SessionManager.inMemory(cwd)
7613
+ });
7614
+ let tokenUsage;
7615
+ let costUsd;
7616
+ const toolTrackers = /* @__PURE__ */ new Map();
7617
+ const completedToolResults = /* @__PURE__ */ new Map();
7618
+ const unsubscribe = session.subscribe((event) => {
7619
+ logger?.handleEvent(event);
7620
+ switch (event.type) {
7621
+ case "message_end": {
7622
+ const msg = event.message;
7623
+ if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
7624
+ const usage = msg.usage;
7625
+ if (usage && typeof usage === "object") {
7626
+ const u = usage;
7627
+ const input = toFiniteNumber(u.input);
7628
+ const output = toFiniteNumber(u.output);
7629
+ const cached = toFiniteNumber(u.cacheRead);
7630
+ let callDelta;
7631
+ if (input !== void 0 || output !== void 0) {
7632
+ callDelta = {
7633
+ input: input ?? 0,
7634
+ output: output ?? 0,
7635
+ ...cached !== void 0 ? { cached } : {}
7636
+ };
7637
+ tokenUsage = {
7638
+ input: (tokenUsage?.input ?? 0) + callDelta.input,
7639
+ output: (tokenUsage?.output ?? 0) + callDelta.output,
7640
+ ...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
7641
+ };
7642
+ }
7643
+ const cost = u.cost;
7644
+ if (cost && typeof cost === "object") {
7645
+ const total = toFiniteNumber(cost.total);
7646
+ if (total !== void 0) {
7647
+ costUsd = (costUsd ?? 0) + total;
7648
+ }
7649
+ }
7650
+ request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
7651
+ }
7652
+ }
7653
+ break;
7654
+ }
7655
+ case "tool_execution_start": {
7656
+ toolTrackers.set(event.toolCallId, {
7657
+ toolCallId: event.toolCallId,
7658
+ toolName: event.toolName,
7659
+ args: event.args,
7660
+ startMs: Date.now(),
7661
+ startTime: (/* @__PURE__ */ new Date()).toISOString()
7662
+ });
7663
+ request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
7664
+ break;
7665
+ }
7666
+ case "tool_execution_end": {
7667
+ const tracker = toolTrackers.get(event.toolCallId);
7668
+ const durationMs = tracker ? Date.now() - tracker.startMs : 0;
7669
+ completedToolResults.set(event.toolCallId, {
7670
+ output: event.result,
7671
+ durationMs
7672
+ });
7673
+ request.streamCallbacks?.onToolCallEnd?.(
7674
+ event.toolName,
7675
+ tracker?.args,
7676
+ event.result,
7677
+ durationMs,
7678
+ event.toolCallId
7679
+ );
7680
+ toolTrackers.delete(event.toolCallId);
7681
+ break;
7682
+ }
7683
+ }
7684
+ });
7685
+ try {
7686
+ const systemPrompt = this.config.systemPrompt;
7687
+ let prompt = request.question;
7688
+ if (systemPrompt) {
7689
+ prompt = `${systemPrompt}
7690
+
7691
+ ${prompt}`;
7692
+ }
7693
+ if (inputFiles && inputFiles.length > 0) {
7694
+ const fileList = inputFiles.map((f) => `@${f}`).join("\n");
7695
+ prompt = `${prompt}
7696
+
7697
+ Files:
7698
+ ${fileList}`;
7699
+ }
7700
+ if (this.config.timeoutMs) {
7701
+ const timeoutMs = this.config.timeoutMs;
7702
+ let timeoutId;
7703
+ const timeoutPromise = new Promise((_, reject) => {
7704
+ timeoutId = setTimeout(
7705
+ () => reject(
7706
+ new Error(`Pi coding agent timed out after ${Math.ceil(timeoutMs / 1e3)}s`)
7707
+ ),
7708
+ timeoutMs
7709
+ );
7710
+ });
7711
+ try {
7712
+ await Promise.race([session.prompt(prompt), timeoutPromise]);
7713
+ } finally {
7714
+ if (timeoutId !== void 0) clearTimeout(timeoutId);
7715
+ }
7716
+ } else {
7717
+ await session.prompt(prompt);
7718
+ }
7719
+ const agentMessages = session.agent.state.messages;
7720
+ const output = [];
7721
+ for (const msg of agentMessages) {
7722
+ output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
7723
+ }
7724
+ const endTime = (/* @__PURE__ */ new Date()).toISOString();
7725
+ const durationMs = Date.now() - startMs;
7726
+ return {
7727
+ raw: {
7728
+ messages: agentMessages,
7729
+ model: this.config.model,
7730
+ provider: this.config.subprovider
7731
+ },
7732
+ output,
7733
+ tokenUsage,
7734
+ costUsd,
7735
+ durationMs,
7736
+ startTime,
7737
+ endTime
7738
+ };
7739
+ } finally {
7740
+ unsubscribe();
7741
+ session.dispose();
7742
+ }
7743
+ } finally {
7744
+ await logger?.close();
7745
+ }
7746
+ }
7747
+ /** Maps config apiKey to the provider-specific env var the SDK reads. */
7748
+ setApiKeyEnv(providerName) {
7749
+ if (!this.config.apiKey) return;
7750
+ const ENV_KEY_MAP = {
7751
+ google: "GEMINI_API_KEY",
7752
+ gemini: "GEMINI_API_KEY",
7753
+ anthropic: "ANTHROPIC_API_KEY",
7754
+ openai: "OPENAI_API_KEY",
7755
+ groq: "GROQ_API_KEY",
7756
+ xai: "XAI_API_KEY",
7757
+ openrouter: "OPENROUTER_API_KEY"
7758
+ };
7759
+ const envKey = ENV_KEY_MAP[providerName.toLowerCase()];
7760
+ if (envKey) {
7761
+ process.env[envKey] = this.config.apiKey;
7762
+ }
7763
+ }
7764
+ resolveCwd(cwdOverride) {
7765
+ if (cwdOverride) {
7766
+ return path18.resolve(cwdOverride);
7767
+ }
7768
+ if (this.config.cwd) {
7769
+ return path18.resolve(this.config.cwd);
7770
+ }
7771
+ return process.cwd();
7772
+ }
7773
+ resolveTools(sdk) {
7774
+ if (!this.config.tools) {
7775
+ return sdk.codingTools;
7776
+ }
7777
+ const toolNames = this.config.tools.split(",").map((t) => t.trim().toLowerCase());
7778
+ const selected = [];
7779
+ for (const name of toolNames) {
7780
+ if (name in sdk.toolMap) {
7781
+ selected.push(sdk.toolMap[name]);
7782
+ }
7783
+ }
7784
+ return selected.length > 0 ? selected : sdk.codingTools;
7785
+ }
7786
+ resolveLogDirectory() {
7787
+ if (this.config.logDir) {
7788
+ return path18.resolve(this.config.logDir);
7789
+ }
7790
+ return path18.join(process.cwd(), ".agentv", "logs", "pi-coding-agent");
7791
+ }
7792
+ async createStreamLogger(request) {
7793
+ const logDir = this.resolveLogDirectory();
7794
+ if (!logDir) {
7795
+ return void 0;
7796
+ }
7797
+ try {
7798
+ await mkdir7(logDir, { recursive: true });
7799
+ } catch (error) {
7800
+ const message = error instanceof Error ? error.message : String(error);
7801
+ console.warn(`Skipping Pi stream logging (could not create ${logDir}): ${message}`);
7802
+ return void 0;
7803
+ }
7804
+ const filePath = path18.join(logDir, buildLogFilename6(request, this.targetName));
7805
+ try {
7806
+ const logger = await PiStreamLogger2.create({
7807
+ filePath,
7808
+ targetName: this.targetName,
7809
+ evalCaseId: request.evalCaseId,
7810
+ attempt: request.attempt,
7811
+ format: this.config.logFormat ?? "summary"
7812
+ });
7813
+ recordPiLogEntry({
7814
+ filePath,
7815
+ targetName: this.targetName,
7816
+ evalCaseId: request.evalCaseId,
7817
+ attempt: request.attempt
7818
+ });
7819
+ return logger;
7820
+ } catch (error) {
7821
+ const message = error instanceof Error ? error.message : String(error);
7822
+ console.warn(`Skipping Pi stream logging for ${filePath}: ${message}`);
7823
+ return void 0;
7824
+ }
7825
+ }
7826
+ };
7827
+ var PiStreamLogger2 = class _PiStreamLogger {
7828
+ filePath;
7829
+ stream;
7830
+ startedAt = Date.now();
7831
+ format;
7832
+ constructor(filePath, format) {
7833
+ this.filePath = filePath;
7834
+ this.format = format;
7835
+ this.stream = createWriteStream6(filePath, { flags: "a" });
7836
+ }
7837
+ static async create(options) {
7838
+ const logger = new _PiStreamLogger(options.filePath, options.format);
7839
+ const header = [
7840
+ "# Pi Coding Agent stream log",
7841
+ `# target: ${options.targetName}`,
7842
+ options.evalCaseId ? `# eval: ${options.evalCaseId}` : void 0,
7843
+ options.attempt !== void 0 ? `# attempt: ${options.attempt + 1}` : void 0,
7844
+ `# started: ${(/* @__PURE__ */ new Date()).toISOString()}`,
7845
+ ""
7846
+ ].filter((line) => Boolean(line));
7847
+ for (const line of header) {
7848
+ logger.stream.write(`${line}
7849
+ `);
7850
+ }
7851
+ return logger;
7852
+ }
7853
+ handleEvent(event) {
7854
+ if (!event || typeof event !== "object") return;
7855
+ const record = event;
7856
+ const type = typeof record.type === "string" ? record.type : void 0;
7857
+ if (!type) return;
7858
+ const message = this.format === "json" ? JSON.stringify(event, null, 2) : summarizeSdkEvent2(event);
7859
+ if (message) {
7860
+ this.stream.write(`[+${formatElapsed6(this.startedAt)}] ${message}
7861
+ `);
7862
+ }
7863
+ }
7864
+ async close() {
7865
+ await new Promise((resolve, reject) => {
7866
+ this.stream.once("error", reject);
7867
+ this.stream.end(() => resolve());
7868
+ });
7869
+ }
7870
+ };
7871
+ function summarizeSdkEvent2(event) {
7872
+ if (!event || typeof event !== "object") return void 0;
7873
+ const record = event;
7874
+ const type = typeof record.type === "string" ? record.type : void 0;
7875
+ if (!type) return void 0;
7876
+ switch (type) {
7877
+ case "agent_start":
7878
+ case "agent_end":
7879
+ case "turn_start":
7880
+ case "turn_end":
7881
+ return type;
7882
+ case "message_start":
7883
+ case "message_end": {
7884
+ const msg = record.message;
7885
+ return `${type}: ${msg?.role ?? "unknown"}`;
7886
+ }
7887
+ case "tool_execution_start":
7888
+ return `tool_start: ${record.toolName}`;
7889
+ case "tool_execution_end":
7890
+ return `tool_end: ${record.toolName}`;
7891
+ default:
7892
+ return type;
7893
+ }
7894
+ }
7895
+ function buildLogFilename6(request, targetName) {
7896
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
7897
+ const evalId = sanitizeForFilename6(request.evalCaseId ?? "pi");
7898
+ const attemptSuffix = request.attempt !== void 0 ? `_attempt-${request.attempt + 1}` : "";
7899
+ const target = sanitizeForFilename6(targetName);
7900
+ return `${timestamp}_${target}_${evalId}${attemptSuffix}_${randomUUID8().slice(0, 8)}.log`;
7901
+ }
7902
+ function sanitizeForFilename6(value) {
7903
+ const sanitized = value.replace(/[^A-Za-z0-9._-]+/g, "_");
7904
+ return sanitized.length > 0 ? sanitized : "pi";
7905
+ }
7906
+ function formatElapsed6(startedAt) {
7907
+ const elapsedSeconds = Math.floor((Date.now() - startedAt) / 1e3);
7908
+ const hours = Math.floor(elapsedSeconds / 3600);
7909
+ const minutes = Math.floor(elapsedSeconds % 3600 / 60);
7910
+ const seconds = elapsedSeconds % 60;
7911
+ if (hours > 0) {
7912
+ return `${hours.toString().padStart(2, "0")}:${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
7641
7913
  }
7642
- return result;
7914
+ return `${minutes.toString().padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`;
7643
7915
  }
7644
- function convertPiMessage(message) {
7916
+ function convertAgentMessage(message, toolTrackers, completedToolResults) {
7645
7917
  if (!message || typeof message !== "object") {
7646
- return void 0;
7918
+ return { role: "unknown", content: String(message) };
7647
7919
  }
7648
7920
  const msg = message;
7649
- const role = msg.role;
7650
- if (typeof role !== "string") {
7651
- return void 0;
7652
- }
7921
+ const role = typeof msg.role === "string" ? msg.role : "unknown";
7653
7922
  const content = extractPiTextContent(msg.content);
7654
- const toolCalls = extractToolCalls4(msg.content);
7655
- const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
7923
+ const toolCalls = extractToolCalls4(msg.content, toolTrackers, completedToolResults);
7924
+ const startTimeVal = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
7925
+ let msgTokenUsage;
7926
+ if (msg.usage && typeof msg.usage === "object") {
7927
+ const u = msg.usage;
7928
+ const input = toFiniteNumber(u.input);
7929
+ const output = toFiniteNumber(u.output);
7930
+ if (input !== void 0 || output !== void 0) {
7931
+ msgTokenUsage = {
7932
+ input: input ?? 0,
7933
+ output: output ?? 0,
7934
+ ...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
7935
+ };
7936
+ }
7937
+ }
7656
7938
  const metadata = {};
7657
7939
  if (msg.api) metadata.api = msg.api;
7658
7940
  if (msg.provider) metadata.provider = msg.provider;
7659
7941
  if (msg.model) metadata.model = msg.model;
7660
- if (msg.usage) metadata.usage = msg.usage;
7661
7942
  if (msg.stopReason) metadata.stopReason = msg.stopReason;
7662
7943
  return {
7663
7944
  role,
7664
7945
  content,
7665
7946
  toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
7666
- startTime,
7667
- metadata: Object.keys(metadata).length > 0 ? metadata : void 0
7947
+ startTime: startTimeVal,
7948
+ metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
7949
+ tokenUsage: msgTokenUsage
7668
7950
  };
7669
7951
  }
7670
- function extractToolCalls4(content) {
7952
+ function extractToolCalls4(content, toolTrackers, completedToolResults) {
7671
7953
  if (!Array.isArray(content)) {
7672
7954
  return [];
7673
7955
  }
@@ -7677,118 +7959,23 @@ function extractToolCalls4(content) {
7677
7959
  continue;
7678
7960
  }
7679
7961
  const p = part;
7680
- if (p.type === "tool_use" && typeof p.name === "string") {
7681
- toolCalls.push({
7682
- tool: p.name,
7683
- input: p.input,
7684
- id: typeof p.id === "string" ? p.id : void 0
7685
- });
7686
- }
7687
7962
  if (p.type === "toolCall" && typeof p.name === "string") {
7963
+ const id = typeof p.id === "string" ? p.id : void 0;
7964
+ const tracker = id ? toolTrackers.get(id) : void 0;
7965
+ const completed = id ? completedToolResults.get(id) : void 0;
7688
7966
  toolCalls.push({
7689
7967
  tool: p.name,
7690
7968
  input: p.arguments,
7691
- id: typeof p.id === "string" ? p.id : void 0
7969
+ id,
7970
+ output: completed?.output,
7971
+ durationMs: completed?.durationMs,
7972
+ startTime: tracker?.startTime,
7973
+ endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
7692
7974
  });
7693
7975
  }
7694
- if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
7695
- const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
7696
- if (existing) {
7697
- const idx = toolCalls.indexOf(existing);
7698
- toolCalls[idx] = {
7699
- ...existing,
7700
- output: p.content
7701
- };
7702
- }
7703
- }
7704
7976
  }
7705
7977
  return toolCalls;
7706
7978
  }
7707
- function escapeAtSymbols(prompt) {
7708
- return prompt.replace(/@\[([^\]]+)\]:/g, "[[$1]]:");
7709
- }
7710
- function pickDetail(stderr, stdout) {
7711
- const errorText = stderr.trim();
7712
- if (errorText.length > 0) {
7713
- return errorText;
7714
- }
7715
- const stdoutText = stdout.trim();
7716
- return stdoutText.length > 0 ? stdoutText : void 0;
7717
- }
7718
- function formatTimeoutSuffix3(timeoutMs) {
7719
- if (!timeoutMs || timeoutMs <= 0) {
7720
- return "";
7721
- }
7722
- const seconds = Math.ceil(timeoutMs / 1e3);
7723
- return ` after ${seconds}s`;
7724
- }
7725
- async function defaultPiRunner(options) {
7726
- return await new Promise((resolve, reject) => {
7727
- const parts = options.executable.split(/\s+/);
7728
- const executable = parts[0];
7729
- const executableArgs = parts.slice(1);
7730
- const allArgs = [...executableArgs, ...options.args];
7731
- const child = spawn3(executable, allArgs, {
7732
- cwd: options.cwd,
7733
- env: options.env,
7734
- stdio: ["pipe", "pipe", "pipe"],
7735
- shell: false
7736
- });
7737
- let stdout = "";
7738
- let stderr = "";
7739
- let timedOut = false;
7740
- const onAbort = () => {
7741
- child.kill("SIGTERM");
7742
- };
7743
- if (options.signal) {
7744
- if (options.signal.aborted) {
7745
- onAbort();
7746
- } else {
7747
- options.signal.addEventListener("abort", onAbort, { once: true });
7748
- }
7749
- }
7750
- let timeoutHandle;
7751
- if (options.timeoutMs && options.timeoutMs > 0) {
7752
- timeoutHandle = setTimeout(() => {
7753
- timedOut = true;
7754
- child.kill("SIGTERM");
7755
- }, options.timeoutMs);
7756
- timeoutHandle.unref?.();
7757
- }
7758
- child.stdout.setEncoding("utf8");
7759
- child.stdout.on("data", (chunk) => {
7760
- stdout += chunk;
7761
- options.onStdoutChunk?.(chunk);
7762
- });
7763
- child.stderr.setEncoding("utf8");
7764
- child.stderr.on("data", (chunk) => {
7765
- stderr += chunk;
7766
- options.onStderrChunk?.(chunk);
7767
- });
7768
- child.stdin.end();
7769
- const cleanup = () => {
7770
- if (timeoutHandle) {
7771
- clearTimeout(timeoutHandle);
7772
- }
7773
- if (options.signal) {
7774
- options.signal.removeEventListener("abort", onAbort);
7775
- }
7776
- };
7777
- child.on("error", (error) => {
7778
- cleanup();
7779
- reject(error);
7780
- });
7781
- child.on("close", (code) => {
7782
- cleanup();
7783
- resolve({
7784
- stdout,
7785
- stderr,
7786
- exitCode: typeof code === "number" ? code : -1,
7787
- timedOut
7788
- });
7789
- });
7790
- });
7791
- }
7792
7979
 
7793
7980
  // src/evaluation/providers/provider-registry.ts
7794
7981
  var ProviderRegistry = class {
@@ -7828,17 +8015,17 @@ var ProviderRegistry = class {
7828
8015
  // src/evaluation/providers/vscode-provider.ts
7829
8016
  import { exec as exec2 } from "node:child_process";
7830
8017
  import { constants as constants3, access as access3, stat as stat4 } from "node:fs/promises";
7831
- import path29 from "node:path";
8018
+ import path30 from "node:path";
7832
8019
  import { promisify as promisify3 } from "node:util";
7833
8020
 
7834
8021
  // src/evaluation/providers/vscode/dispatch/agentDispatch.ts
7835
8022
  import { stat as stat3, writeFile as writeFile4 } from "node:fs/promises";
7836
- import path27 from "node:path";
8023
+ import path28 from "node:path";
7837
8024
 
7838
8025
  // src/evaluation/providers/vscode/utils/fs.ts
7839
8026
  import { constants as constants2 } from "node:fs";
7840
- import { access as access2, mkdir as mkdir7, readdir, rm as rm2, stat } from "node:fs/promises";
7841
- import path18 from "node:path";
8027
+ import { access as access2, mkdir as mkdir8, readdir, rm as rm2, stat } from "node:fs/promises";
8028
+ import path19 from "node:path";
7842
8029
  async function pathExists(target) {
7843
8030
  try {
7844
8031
  await access2(target, constants2.F_OK);
@@ -7848,13 +8035,13 @@ async function pathExists(target) {
7848
8035
  }
7849
8036
  }
7850
8037
  async function ensureDir(target) {
7851
- await mkdir7(target, { recursive: true });
8038
+ await mkdir8(target, { recursive: true });
7852
8039
  }
7853
8040
  async function readDirEntries(target) {
7854
8041
  const entries = await readdir(target, { withFileTypes: true });
7855
8042
  return entries.map((entry) => ({
7856
8043
  name: entry.name,
7857
- absolutePath: path18.join(target, entry.name),
8044
+ absolutePath: path19.join(target, entry.name),
7858
8045
  isDirectory: entry.isDirectory()
7859
8046
  }));
7860
8047
  }
@@ -7869,9 +8056,9 @@ async function removeIfExists(target) {
7869
8056
  }
7870
8057
 
7871
8058
  // src/evaluation/providers/vscode/utils/path.ts
7872
- import path19 from "node:path";
8059
+ import path20 from "node:path";
7873
8060
  function pathToFileUri2(filePath) {
7874
- const absolutePath = path19.isAbsolute(filePath) ? filePath : path19.resolve(filePath);
8061
+ const absolutePath = path20.isAbsolute(filePath) ? filePath : path20.resolve(filePath);
7875
8062
  const normalizedPath = absolutePath.replace(/\\/g, "/");
7876
8063
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
7877
8064
  return `file:///${normalizedPath}`;
@@ -7880,7 +8067,7 @@ function pathToFileUri2(filePath) {
7880
8067
  }
7881
8068
 
7882
8069
  // src/evaluation/providers/vscode/dispatch/promptBuilder.ts
7883
- import path20 from "node:path";
8070
+ import path21 from "node:path";
7884
8071
 
7885
8072
  // src/evaluation/providers/vscode/utils/template.ts
7886
8073
  function renderTemplate2(content, variables) {
@@ -7972,8 +8159,8 @@ function createBatchRequestPrompt(userQuery, responseFileTmp, responseFileFinal,
7972
8159
  });
7973
8160
  }
7974
8161
  function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateContent) {
7975
- const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path20.basename(file)}`).join("\n");
7976
- const responseList = responseFiles.map((file) => `"${path20.basename(file)}"`).join(", ");
8162
+ const requestLines = requestFiles.map((file, index) => `${index + 1}. messages/${path21.basename(file)}`).join("\n");
8163
+ const responseList = responseFiles.map((file) => `"${path21.basename(file)}"`).join(", ");
7977
8164
  return renderTemplate2(templateContent, {
7978
8165
  requestFiles: requestLines,
7979
8166
  responseList
@@ -7982,7 +8169,7 @@ function createBatchOrchestratorPrompt(requestFiles, responseFiles, templateCont
7982
8169
 
7983
8170
  // src/evaluation/providers/vscode/dispatch/responseWaiter.ts
7984
8171
  import { readFile as readFile7 } from "node:fs/promises";
7985
- import path21 from "node:path";
8172
+ import path22 from "node:path";
7986
8173
 
7987
8174
  // src/evaluation/providers/vscode/utils/time.ts
7988
8175
  function sleep2(ms) {
@@ -8041,7 +8228,7 @@ async function waitForResponseOutput(responseFileFinal, pollInterval = 1e3, sile
8041
8228
  }
8042
8229
  async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, silent = false, timeoutMs = DEFAULT_TIMEOUT_MS) {
8043
8230
  if (!silent) {
8044
- const fileList = responseFilesFinal.map((file) => path21.basename(file)).join(", ");
8231
+ const fileList = responseFilesFinal.map((file) => path22.basename(file)).join(", ");
8045
8232
  console.error(`waiting for ${responseFilesFinal.length} batch response(s): ${fileList}`);
8046
8233
  }
8047
8234
  const deadline = Date.now() + timeoutMs;
@@ -8050,7 +8237,7 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8050
8237
  while (pending.size > 0) {
8051
8238
  if (Date.now() >= deadline) {
8052
8239
  if (!silent) {
8053
- const remaining = [...pending].map((f) => path21.basename(f)).join(", ");
8240
+ const remaining = [...pending].map((f) => path22.basename(f)).join(", ");
8054
8241
  console.error(
8055
8242
  `error: timed out after ${Math.round(timeoutMs / 1e3)}s waiting for batch responses. Still pending: ${remaining}`
8056
8243
  );
@@ -8100,16 +8287,16 @@ async function waitForBatchResponses(responseFilesFinal, pollInterval = 1e3, sil
8100
8287
 
8101
8288
  // src/evaluation/providers/vscode/dispatch/vscodeProcess.ts
8102
8289
  import { exec, spawn as spawn4 } from "node:child_process";
8103
- import { mkdir as mkdir8, writeFile as writeFile2 } from "node:fs/promises";
8104
- import path24 from "node:path";
8290
+ import { mkdir as mkdir9, writeFile as writeFile2 } from "node:fs/promises";
8291
+ import path25 from "node:path";
8105
8292
  import { promisify as promisify2 } from "node:util";
8106
8293
 
8107
8294
  // src/evaluation/providers/vscode/dispatch/constants.ts
8108
- import path23 from "node:path";
8295
+ import path24 from "node:path";
8109
8296
 
8110
8297
  // src/paths.ts
8111
8298
  import os2 from "node:os";
8112
- import path22 from "node:path";
8299
+ import path23 from "node:path";
8113
8300
  var logged = false;
8114
8301
  function getAgentvHome() {
8115
8302
  const envHome = process.env.AGENTV_HOME;
@@ -8120,19 +8307,19 @@ function getAgentvHome() {
8120
8307
  }
8121
8308
  return envHome;
8122
8309
  }
8123
- return path22.join(os2.homedir(), ".agentv");
8310
+ return path23.join(os2.homedir(), ".agentv");
8124
8311
  }
8125
8312
  function getWorkspacesRoot() {
8126
- return path22.join(getAgentvHome(), "workspaces");
8313
+ return path23.join(getAgentvHome(), "workspaces");
8127
8314
  }
8128
8315
  function getSubagentsRoot() {
8129
- return path22.join(getAgentvHome(), "subagents");
8316
+ return path23.join(getAgentvHome(), "subagents");
8130
8317
  }
8131
8318
  function getTraceStateRoot() {
8132
- return path22.join(getAgentvHome(), "trace-state");
8319
+ return path23.join(getAgentvHome(), "trace-state");
8133
8320
  }
8134
8321
  function getWorkspacePoolRoot() {
8135
- return path22.join(getAgentvHome(), "workspace-pool");
8322
+ return path23.join(getAgentvHome(), "workspace-pool");
8136
8323
  }
8137
8324
 
8138
8325
  // src/evaluation/providers/vscode/dispatch/constants.ts
@@ -8140,7 +8327,7 @@ var DEFAULT_LOCK_NAME = "subagent.lock";
8140
8327
  var DEFAULT_ALIVE_FILENAME = ".alive";
8141
8328
  function getDefaultSubagentRoot(vscodeCmd = "code") {
8142
8329
  const folder = vscodeCmd === "code-insiders" ? "vscode-insiders-agents" : "vscode-agents";
8143
- return path23.join(getSubagentsRoot(), folder);
8330
+ return path24.join(getSubagentsRoot(), folder);
8144
8331
  }
8145
8332
  var DEFAULT_SUBAGENT_ROOT = getDefaultSubagentRoot();
8146
8333
 
@@ -8207,11 +8394,11 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8207
8394
  await raceSpawnError(child);
8208
8395
  return true;
8209
8396
  }
8210
- const aliveFile = path24.join(subagentDir, DEFAULT_ALIVE_FILENAME);
8397
+ const aliveFile = path25.join(subagentDir, DEFAULT_ALIVE_FILENAME);
8211
8398
  await removeIfExists(aliveFile);
8212
- const githubAgentsDir = path24.join(subagentDir, ".github", "agents");
8213
- await mkdir8(githubAgentsDir, { recursive: true });
8214
- const wakeupDst = path24.join(githubAgentsDir, "wakeup.md");
8399
+ const githubAgentsDir = path25.join(subagentDir, ".github", "agents");
8400
+ await mkdir9(githubAgentsDir, { recursive: true });
8401
+ const wakeupDst = path25.join(githubAgentsDir, "wakeup.md");
8215
8402
  await writeFile2(wakeupDst, DEFAULT_WAKEUP_CONTENT, "utf8");
8216
8403
  const workspaceChild = spawnVsCode(vscodeCmd, [workspacePath], {
8217
8404
  label: "open-workspace"
@@ -8224,7 +8411,7 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8224
8411
  "chat",
8225
8412
  "-m",
8226
8413
  wakeupChatId,
8227
- `create a file named .alive in the ${path24.basename(subagentDir)} folder`
8414
+ `create a file named .alive in the ${path25.basename(subagentDir)} folder`
8228
8415
  ];
8229
8416
  const wakeupChild = spawnVsCode(vscodeCmd, chatArgs, { label: "send-wakeup-chat" });
8230
8417
  await raceSpawnError(wakeupChild);
@@ -8239,10 +8426,10 @@ async function ensureWorkspaceFocused(workspacePath, workspaceName, subagentDir,
8239
8426
  return true;
8240
8427
  }
8241
8428
  async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, requestInstructions, timestamp, vscodeCmd) {
8242
- const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
8243
- const messagesDir = path24.join(subagentDir, "messages");
8244
- await mkdir8(messagesDir, { recursive: true });
8245
- const reqFile = path24.join(messagesDir, `${timestamp}_req.md`);
8429
+ const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
8430
+ const messagesDir = path25.join(subagentDir, "messages");
8431
+ await mkdir9(messagesDir, { recursive: true });
8432
+ const reqFile = path25.join(messagesDir, `${timestamp}_req.md`);
8246
8433
  await writeFile2(reqFile, requestInstructions, { encoding: "utf8" });
8247
8434
  const reqUri = pathToFileUri2(reqFile);
8248
8435
  const chatArgs = ["-r", "chat", "-m", chatId];
@@ -8250,16 +8437,16 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
8250
8437
  chatArgs.push("-a", attachment);
8251
8438
  }
8252
8439
  chatArgs.push("-a", reqFile);
8253
- chatArgs.push(`Follow instructions in [${path24.basename(reqFile)}](${reqUri})`);
8440
+ chatArgs.push(`Follow instructions in [${path25.basename(reqFile)}](${reqUri})`);
8254
8441
  const workspaceReady = await ensureWorkspaceFocused(
8255
8442
  workspacePath,
8256
- path24.basename(subagentDir),
8443
+ path25.basename(subagentDir),
8257
8444
  subagentDir,
8258
8445
  vscodeCmd
8259
8446
  );
8260
8447
  if (!workspaceReady) {
8261
8448
  throw new Error(
8262
- `VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8449
+ `VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8263
8450
  );
8264
8451
  }
8265
8452
  await sleep2(500);
@@ -8267,9 +8454,9 @@ async function launchVsCodeWithChat(subagentDir, chatId, attachmentPaths, reques
8267
8454
  await raceSpawnError(child);
8268
8455
  }
8269
8456
  async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, chatInstruction, vscodeCmd) {
8270
- const workspacePath = path24.join(subagentDir, `${path24.basename(subagentDir)}.code-workspace`);
8271
- const messagesDir = path24.join(subagentDir, "messages");
8272
- await mkdir8(messagesDir, { recursive: true });
8457
+ const workspacePath = path25.join(subagentDir, `${path25.basename(subagentDir)}.code-workspace`);
8458
+ const messagesDir = path25.join(subagentDir, "messages");
8459
+ await mkdir9(messagesDir, { recursive: true });
8273
8460
  const chatArgs = ["-r", "chat", "-m", chatId];
8274
8461
  for (const attachment of attachmentPaths) {
8275
8462
  chatArgs.push("-a", attachment);
@@ -8277,13 +8464,13 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
8277
8464
  chatArgs.push(chatInstruction);
8278
8465
  const workspaceReady = await ensureWorkspaceFocused(
8279
8466
  workspacePath,
8280
- path24.basename(subagentDir),
8467
+ path25.basename(subagentDir),
8281
8468
  subagentDir,
8282
8469
  vscodeCmd
8283
8470
  );
8284
8471
  if (!workspaceReady) {
8285
8472
  throw new Error(
8286
- `VS Code workspace '${path24.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8473
+ `VS Code workspace '${path25.basename(subagentDir)}' failed to become ready within the timeout. Check that '${vscodeCmd}' can open workspaces.`
8287
8474
  );
8288
8475
  }
8289
8476
  await sleep2(500);
@@ -8292,11 +8479,11 @@ async function launchVsCodeWithBatchChat(subagentDir, chatId, attachmentPaths, c
8292
8479
  }
8293
8480
 
8294
8481
  // src/evaluation/providers/vscode/dispatch/workspaceManager.ts
8295
- import { copyFile, mkdir as mkdir9, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
8296
- import path26 from "node:path";
8482
+ import { copyFile, mkdir as mkdir10, readFile as readFile8, readdir as readdir2, stat as stat2, writeFile as writeFile3 } from "node:fs/promises";
8483
+ import path27 from "node:path";
8297
8484
 
8298
8485
  // src/evaluation/providers/vscode/utils/workspace.ts
8299
- import path25 from "node:path";
8486
+ import path26 from "node:path";
8300
8487
  import JSON5 from "json5";
8301
8488
  function transformWorkspacePaths(workspaceContent, templateDir) {
8302
8489
  let workspace;
@@ -8313,10 +8500,10 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
8313
8500
  }
8314
8501
  const transformedFolders = workspace.folders.map((folder) => {
8315
8502
  const folderPath = folder.path;
8316
- if (path25.isAbsolute(folderPath)) {
8503
+ if (path26.isAbsolute(folderPath)) {
8317
8504
  return folder;
8318
8505
  }
8319
- const absolutePath = path25.resolve(templateDir, folderPath);
8506
+ const absolutePath = path26.resolve(templateDir, folderPath);
8320
8507
  return {
8321
8508
  ...folder,
8322
8509
  path: absolutePath
@@ -8338,19 +8525,19 @@ function transformWorkspacePaths(workspaceContent, templateDir) {
8338
8525
  if (locationMap && typeof locationMap === "object") {
8339
8526
  const transformedMap = {};
8340
8527
  for (const [locationPath, value] of Object.entries(locationMap)) {
8341
- const isAbsolute = path25.isAbsolute(locationPath);
8528
+ const isAbsolute = path26.isAbsolute(locationPath);
8342
8529
  if (isAbsolute) {
8343
8530
  transformedMap[locationPath] = value;
8344
8531
  } else {
8345
8532
  const firstGlobIndex = locationPath.search(/[*]/);
8346
8533
  if (firstGlobIndex === -1) {
8347
- const resolvedPath = path25.resolve(templateDir, locationPath).replace(/\\/g, "/");
8534
+ const resolvedPath = path26.resolve(templateDir, locationPath).replace(/\\/g, "/");
8348
8535
  transformedMap[resolvedPath] = value;
8349
8536
  } else {
8350
8537
  const basePathEnd = locationPath.lastIndexOf("/", firstGlobIndex);
8351
8538
  const basePath = basePathEnd !== -1 ? locationPath.substring(0, basePathEnd) : ".";
8352
8539
  const patternPath = locationPath.substring(basePathEnd !== -1 ? basePathEnd : 0);
8353
- const resolvedPath = (path25.resolve(templateDir, basePath) + patternPath).replace(
8540
+ const resolvedPath = (path26.resolve(templateDir, basePath) + patternPath).replace(
8354
8541
  /\\/g,
8355
8542
  "/"
8356
8543
  );
@@ -8391,7 +8578,7 @@ async function findUnlockedSubagent(subagentRoot) {
8391
8578
  number: Number.parseInt(entry.name.split("-")[1] ?? "", 10)
8392
8579
  })).filter((entry) => Number.isInteger(entry.number)).sort((a, b) => a.number - b.number);
8393
8580
  for (const subagent of subagents) {
8394
- const lockFile = path26.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
8581
+ const lockFile = path27.join(subagent.absolutePath, DEFAULT_LOCK_NAME);
8395
8582
  if (!await pathExists(lockFile)) {
8396
8583
  return subagent.absolutePath;
8397
8584
  }
@@ -8401,7 +8588,7 @@ async function findUnlockedSubagent(subagentRoot) {
8401
8588
  async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8402
8589
  let workspaceContent;
8403
8590
  if (workspaceTemplate) {
8404
- const workspaceSrc = path26.resolve(workspaceTemplate);
8591
+ const workspaceSrc = path27.resolve(workspaceTemplate);
8405
8592
  if (!await pathExists(workspaceSrc)) {
8406
8593
  throw new Error(`workspace template not found: ${workspaceSrc}`);
8407
8594
  }
@@ -8414,13 +8601,13 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8414
8601
  } else {
8415
8602
  workspaceContent = DEFAULT_WORKSPACE_TEMPLATE;
8416
8603
  }
8417
- const workspaceName = `${path26.basename(subagentDir)}.code-workspace`;
8418
- const workspaceDst = path26.join(subagentDir, workspaceName);
8419
- const templateDir = workspaceTemplate ? path26.dirname(path26.resolve(workspaceTemplate)) : subagentDir;
8604
+ const workspaceName = `${path27.basename(subagentDir)}.code-workspace`;
8605
+ const workspaceDst = path27.join(subagentDir, workspaceName);
8606
+ const templateDir = workspaceTemplate ? path27.dirname(path27.resolve(workspaceTemplate)) : subagentDir;
8420
8607
  const workspaceJson = JSON.stringify(workspaceContent, null, 2);
8421
8608
  let transformedContent = transformWorkspacePaths(workspaceJson, templateDir);
8422
8609
  if (cwd) {
8423
- const absCwd = path26.resolve(cwd);
8610
+ const absCwd = path27.resolve(cwd);
8424
8611
  const parsed = JSON.parse(transformedContent);
8425
8612
  const alreadyPresent = parsed.folders.some((f) => f.path === absCwd);
8426
8613
  if (!alreadyPresent) {
@@ -8429,35 +8616,35 @@ async function copyAgentConfig(subagentDir, workspaceTemplate, cwd) {
8429
8616
  }
8430
8617
  }
8431
8618
  await writeFile3(workspaceDst, transformedContent, "utf8");
8432
- const messagesDir = path26.join(subagentDir, "messages");
8433
- await mkdir9(messagesDir, { recursive: true });
8619
+ const messagesDir = path27.join(subagentDir, "messages");
8620
+ await mkdir10(messagesDir, { recursive: true });
8434
8621
  return { workspace: workspaceDst, messagesDir };
8435
8622
  }
8436
8623
  async function createSubagentLock(subagentDir) {
8437
- const messagesDir = path26.join(subagentDir, "messages");
8624
+ const messagesDir = path27.join(subagentDir, "messages");
8438
8625
  if (await pathExists(messagesDir)) {
8439
8626
  const files = await readdir2(messagesDir);
8440
8627
  await Promise.all(
8441
8628
  files.map(async (file) => {
8442
- const target = path26.join(messagesDir, file);
8629
+ const target = path27.join(messagesDir, file);
8443
8630
  await removeIfExists(target);
8444
8631
  })
8445
8632
  );
8446
8633
  }
8447
- const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
8634
+ const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
8448
8635
  if (await pathExists(githubAgentsDir)) {
8449
8636
  const agentFiles = await readdir2(githubAgentsDir);
8450
8637
  const preservedFiles = /* @__PURE__ */ new Set(["wakeup.md", "subagent.md"]);
8451
8638
  await Promise.all(
8452
- agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path26.join(githubAgentsDir, file)))
8639
+ agentFiles.filter((file) => file.endsWith(".md") && !preservedFiles.has(file)).map((file) => removeIfExists(path27.join(githubAgentsDir, file)))
8453
8640
  );
8454
8641
  }
8455
- const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
8642
+ const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
8456
8643
  await writeFile3(lockFile, "", { encoding: "utf8" });
8457
8644
  return lockFile;
8458
8645
  }
8459
8646
  async function removeSubagentLock(subagentDir) {
8460
- const lockFile = path26.join(subagentDir, DEFAULT_LOCK_NAME);
8647
+ const lockFile = path27.join(subagentDir, DEFAULT_LOCK_NAME);
8461
8648
  await removeIfExists(lockFile);
8462
8649
  }
8463
8650
  async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspaceTemplate, dryRun, cwd) {
@@ -8477,9 +8664,9 @@ async function prepareSubagentDirectory(subagentDir, promptFile, chatId, workspa
8477
8664
  return 1;
8478
8665
  }
8479
8666
  if (promptFile) {
8480
- const githubAgentsDir = path26.join(subagentDir, ".github", "agents");
8481
- await mkdir9(githubAgentsDir, { recursive: true });
8482
- const agentFile = path26.join(githubAgentsDir, `${chatId}.md`);
8667
+ const githubAgentsDir = path27.join(subagentDir, ".github", "agents");
8668
+ await mkdir10(githubAgentsDir, { recursive: true });
8669
+ const agentFile = path27.join(githubAgentsDir, `${chatId}.md`);
8483
8670
  try {
8484
8671
  await copyFile(promptFile, agentFile);
8485
8672
  } catch (error) {
@@ -8498,7 +8685,7 @@ async function resolvePromptFile(promptFile) {
8498
8685
  if (!promptFile) {
8499
8686
  return void 0;
8500
8687
  }
8501
- const resolvedPrompt = path27.resolve(promptFile);
8688
+ const resolvedPrompt = path28.resolve(promptFile);
8502
8689
  if (!await pathExists(resolvedPrompt)) {
8503
8690
  throw new Error(`Prompt file not found: ${resolvedPrompt}`);
8504
8691
  }
@@ -8514,7 +8701,7 @@ async function resolveAttachments(extraAttachments) {
8514
8701
  }
8515
8702
  const resolved = [];
8516
8703
  for (const attachment of extraAttachments) {
8517
- const resolvedPath = path27.resolve(attachment);
8704
+ const resolvedPath = path28.resolve(attachment);
8518
8705
  if (!await pathExists(resolvedPath)) {
8519
8706
  throw new Error(`Attachment not found: ${resolvedPath}`);
8520
8707
  }
@@ -8556,7 +8743,7 @@ async function dispatchAgentSession(options) {
8556
8743
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
8557
8744
  };
8558
8745
  }
8559
- const subagentName = path27.basename(subagentDir);
8746
+ const subagentName = path28.basename(subagentDir);
8560
8747
  const chatId = Math.random().toString(16).slice(2, 10);
8561
8748
  const preparationResult = await prepareSubagentDirectory(
8562
8749
  subagentDir,
@@ -8584,9 +8771,9 @@ async function dispatchAgentSession(options) {
8584
8771
  };
8585
8772
  }
8586
8773
  const timestamp = generateTimestamp();
8587
- const messagesDir = path27.join(subagentDir, "messages");
8588
- const responseFileTmp = path27.join(messagesDir, `${timestamp}_res.tmp.md`);
8589
- const responseFileFinal = path27.join(messagesDir, `${timestamp}_res.md`);
8774
+ const messagesDir = path28.join(subagentDir, "messages");
8775
+ const responseFileTmp = path28.join(messagesDir, `${timestamp}_res.tmp.md`);
8776
+ const responseFileFinal = path28.join(messagesDir, `${timestamp}_res.md`);
8590
8777
  const requestInstructions = createRequestPrompt(
8591
8778
  userQuery,
8592
8779
  responseFileTmp,
@@ -8691,7 +8878,7 @@ async function dispatchBatchAgent(options) {
8691
8878
  error: "No unlocked subagents available. Provision additional subagents with: subagent code provision --subagents <desired_total>"
8692
8879
  };
8693
8880
  }
8694
- subagentName = path27.basename(subagentDir);
8881
+ subagentName = path28.basename(subagentDir);
8695
8882
  const chatId = Math.random().toString(16).slice(2, 10);
8696
8883
  const preparationResult = await prepareSubagentDirectory(
8697
8884
  subagentDir,
@@ -8722,17 +8909,17 @@ async function dispatchBatchAgent(options) {
8722
8909
  };
8723
8910
  }
8724
8911
  const timestamp = generateTimestamp();
8725
- const messagesDir = path27.join(subagentDir, "messages");
8912
+ const messagesDir = path28.join(subagentDir, "messages");
8726
8913
  requestFiles = userQueries.map(
8727
- (_, index) => path27.join(messagesDir, `${timestamp}_${index}_req.md`)
8914
+ (_, index) => path28.join(messagesDir, `${timestamp}_${index}_req.md`)
8728
8915
  );
8729
8916
  const responseTmpFiles = userQueries.map(
8730
- (_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
8917
+ (_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.tmp.md`)
8731
8918
  );
8732
8919
  responseFilesFinal = userQueries.map(
8733
- (_, index) => path27.join(messagesDir, `${timestamp}_${index}_res.md`)
8920
+ (_, index) => path28.join(messagesDir, `${timestamp}_${index}_res.md`)
8734
8921
  );
8735
- const orchestratorFile = path27.join(messagesDir, `${timestamp}_orchestrator.md`);
8922
+ const orchestratorFile = path28.join(messagesDir, `${timestamp}_orchestrator.md`);
8736
8923
  if (!dryRun) {
8737
8924
  await Promise.all(
8738
8925
  userQueries.map((query, index) => {
@@ -8818,7 +9005,7 @@ async function dispatchBatchAgent(options) {
8818
9005
 
8819
9006
  // src/evaluation/providers/vscode/dispatch/provision.ts
8820
9007
  import { writeFile as writeFile5 } from "node:fs/promises";
8821
- import path28 from "node:path";
9008
+ import path29 from "node:path";
8822
9009
  var DEFAULT_WORKSPACE_TEMPLATE2 = {
8823
9010
  folders: [
8824
9011
  {
@@ -8849,7 +9036,7 @@ async function provisionSubagents(options) {
8849
9036
  if (!Number.isInteger(subagents) || subagents < 1) {
8850
9037
  throw new Error("subagents must be a positive integer");
8851
9038
  }
8852
- const targetPath = path28.resolve(targetRoot);
9039
+ const targetPath = path29.resolve(targetRoot);
8853
9040
  if (!dryRun) {
8854
9041
  await ensureDir(targetPath);
8855
9042
  }
@@ -8869,7 +9056,7 @@ async function provisionSubagents(options) {
8869
9056
  continue;
8870
9057
  }
8871
9058
  highestNumber = Math.max(highestNumber, parsed);
8872
- const lockFile = path28.join(entry.absolutePath, lockName);
9059
+ const lockFile = path29.join(entry.absolutePath, lockName);
8873
9060
  const locked = await pathExists(lockFile);
8874
9061
  if (locked) {
8875
9062
  lockedSubagents.add(entry.absolutePath);
@@ -8886,10 +9073,10 @@ async function provisionSubagents(options) {
8886
9073
  break;
8887
9074
  }
8888
9075
  const subagentDir = subagent.absolutePath;
8889
- const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8890
- const lockFile = path28.join(subagentDir, lockName);
8891
- const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
8892
- const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
9076
+ const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
9077
+ const lockFile = path29.join(subagentDir, lockName);
9078
+ const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
9079
+ const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
8893
9080
  const isLocked = await pathExists(lockFile);
8894
9081
  if (isLocked && !force) {
8895
9082
  continue;
@@ -8927,10 +9114,10 @@ async function provisionSubagents(options) {
8927
9114
  let nextIndex = highestNumber;
8928
9115
  while (subagentsProvisioned < subagents) {
8929
9116
  nextIndex += 1;
8930
- const subagentDir = path28.join(targetPath, `subagent-${nextIndex}`);
8931
- const githubAgentsDir = path28.join(subagentDir, ".github", "agents");
8932
- const workspaceDst = path28.join(subagentDir, `${path28.basename(subagentDir)}.code-workspace`);
8933
- const wakeupDst = path28.join(githubAgentsDir, "wakeup.md");
9117
+ const subagentDir = path29.join(targetPath, `subagent-${nextIndex}`);
9118
+ const githubAgentsDir = path29.join(subagentDir, ".github", "agents");
9119
+ const workspaceDst = path29.join(subagentDir, `${path29.basename(subagentDir)}.code-workspace`);
9120
+ const wakeupDst = path29.join(githubAgentsDir, "wakeup.md");
8934
9121
  if (!dryRun) {
8935
9122
  await ensureDir(subagentDir);
8936
9123
  await ensureDir(githubAgentsDir);
@@ -9120,7 +9307,7 @@ var VSCodeProvider = class {
9120
9307
  async function locateVSCodeExecutable(candidate) {
9121
9308
  const includesPathSeparator = candidate.includes("/") || candidate.includes("\\");
9122
9309
  if (includesPathSeparator) {
9123
- const resolved = path29.isAbsolute(candidate) ? candidate : path29.resolve(candidate);
9310
+ const resolved = path30.isAbsolute(candidate) ? candidate : path30.resolve(candidate);
9124
9311
  try {
9125
9312
  await access3(resolved, constants3.F_OK);
9126
9313
  return resolved;
@@ -9149,7 +9336,7 @@ async function resolveWorkspaceTemplateFile(template) {
9149
9336
  return void 0;
9150
9337
  }
9151
9338
  try {
9152
- const stats = await stat4(path29.resolve(template));
9339
+ const stats = await stat4(path30.resolve(template));
9153
9340
  return stats.isFile() ? template : void 0;
9154
9341
  } catch {
9155
9342
  return template;
@@ -9173,7 +9360,7 @@ function buildMandatoryPrereadBlock2(attachmentFiles) {
9173
9360
  return "";
9174
9361
  }
9175
9362
  const buildList = (files) => files.map((absolutePath) => {
9176
- const fileName = path29.basename(absolutePath);
9363
+ const fileName = path30.basename(absolutePath);
9177
9364
  const fileUri = pathToFileUri3(absolutePath);
9178
9365
  return `* [${fileName}](${fileUri})`;
9179
9366
  });
@@ -9194,7 +9381,7 @@ function collectAttachmentFiles(attachments) {
9194
9381
  }
9195
9382
  const unique = /* @__PURE__ */ new Map();
9196
9383
  for (const attachment of attachments) {
9197
- const absolutePath = path29.resolve(attachment);
9384
+ const absolutePath = path30.resolve(attachment);
9198
9385
  if (!unique.has(absolutePath)) {
9199
9386
  unique.set(absolutePath, absolutePath);
9200
9387
  }
@@ -9202,7 +9389,7 @@ function collectAttachmentFiles(attachments) {
9202
9389
  return Array.from(unique.values());
9203
9390
  }
9204
9391
  function pathToFileUri3(filePath) {
9205
- const absolutePath = path29.isAbsolute(filePath) ? filePath : path29.resolve(filePath);
9392
+ const absolutePath = path30.isAbsolute(filePath) ? filePath : path30.resolve(filePath);
9206
9393
  const normalizedPath = absolutePath.replace(/\\/g, "/");
9207
9394
  if (/^[a-zA-Z]:\//.test(normalizedPath)) {
9208
9395
  return `file:///${normalizedPath}`;
@@ -9215,7 +9402,7 @@ function normalizeAttachments(attachments) {
9215
9402
  }
9216
9403
  const deduped = /* @__PURE__ */ new Set();
9217
9404
  for (const attachment of attachments) {
9218
- deduped.add(path29.resolve(attachment));
9405
+ deduped.add(path30.resolve(attachment));
9219
9406
  }
9220
9407
  return Array.from(deduped);
9221
9408
  }
@@ -9224,7 +9411,7 @@ function mergeAttachments(all) {
9224
9411
  for (const list of all) {
9225
9412
  if (!list) continue;
9226
9413
  for (const inputFile of list) {
9227
- deduped.add(path29.resolve(inputFile));
9414
+ deduped.add(path30.resolve(inputFile));
9228
9415
  }
9229
9416
  }
9230
9417
  return deduped.size > 0 ? Array.from(deduped) : void 0;
@@ -9273,7 +9460,7 @@ total unlocked subagents available: ${result.created.length + result.skippedExis
9273
9460
  // src/evaluation/providers/targets-file.ts
9274
9461
  import { constants as constants4 } from "node:fs";
9275
9462
  import { access as access4, readFile as readFile9 } from "node:fs/promises";
9276
- import path30 from "node:path";
9463
+ import path31 from "node:path";
9277
9464
  import { parse as parse4 } from "yaml";
9278
9465
  function isRecord(value) {
9279
9466
  return typeof value === "object" && value !== null && !Array.isArray(value);
@@ -9310,7 +9497,7 @@ async function fileExists3(filePath) {
9310
9497
  }
9311
9498
  }
9312
9499
  async function readTargetDefinitions(filePath) {
9313
- const absolutePath = path30.resolve(filePath);
9500
+ const absolutePath = path31.resolve(filePath);
9314
9501
  if (!await fileExists3(absolutePath)) {
9315
9502
  throw new Error(`targets.yaml not found at ${absolutePath}`);
9316
9503
  }
@@ -9330,16 +9517,16 @@ function listTargetNames(definitions) {
9330
9517
  }
9331
9518
 
9332
9519
  // src/evaluation/providers/provider-discovery.ts
9333
- import path31 from "node:path";
9520
+ import path32 from "node:path";
9334
9521
  import fg from "fast-glob";
9335
9522
  async function discoverProviders(registry, baseDir) {
9336
9523
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
9337
9524
  const candidateDirs = [];
9338
- let dir = path31.resolve(baseDir);
9339
- const root = path31.parse(dir).root;
9525
+ let dir = path32.resolve(baseDir);
9526
+ const root = path32.parse(dir).root;
9340
9527
  while (dir !== root) {
9341
- candidateDirs.push(path31.join(dir, ".agentv", "providers"));
9342
- dir = path31.dirname(dir);
9528
+ candidateDirs.push(path32.join(dir, ".agentv", "providers"));
9529
+ dir = path32.dirname(dir);
9343
9530
  }
9344
9531
  let files = [];
9345
9532
  for (const providersDir of candidateDirs) {
@@ -9355,7 +9542,7 @@ async function discoverProviders(registry, baseDir) {
9355
9542
  }
9356
9543
  const discoveredKinds = [];
9357
9544
  for (const filePath of files) {
9358
- const basename = path31.basename(filePath);
9545
+ const basename = path32.basename(filePath);
9359
9546
  const kindName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
9360
9547
  if (registry.has(kindName)) {
9361
9548
  continue;
@@ -9373,7 +9560,7 @@ async function discoverProviders(registry, baseDir) {
9373
9560
  // src/evaluation/providers/index.ts
9374
9561
  function createBuiltinProviderRegistry() {
9375
9562
  const registry = new ProviderRegistry();
9376
- registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-agent-sdk", (t) => new PiAgentSdkProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
9563
+ registry.register("openai", (t) => new OpenAIProvider(t.name, t.config)).register("openrouter", (t) => new OpenRouterProvider(t.name, t.config)).register("azure", (t) => new AzureProvider(t.name, t.config)).register("anthropic", (t) => new AnthropicProvider(t.name, t.config)).register("gemini", (t) => new GeminiProvider(t.name, t.config)).register("cli", (t) => new CliProvider(t.name, t.config)).register("codex", (t) => new CodexProvider(t.name, t.config)).register("copilot-sdk", (t) => new CopilotSdkProvider(t.name, t.config)).register("copilot-cli", (t) => new CopilotCliProvider(t.name, t.config)).register("pi-coding-agent", (t) => new PiCodingAgentProvider(t.name, t.config)).register("pi-cli", (t) => new PiCliProvider(t.name, t.config)).register("claude-cli", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude", (t) => new ClaudeCliProvider(t.name, t.config)).register("claude-sdk", (t) => new ClaudeSdkProvider(t.name, t.config)).register("mock", (t) => new MockProvider(t.name, t.config)).register("agentv", (t) => new AgentvProvider(t.name, t.config)).register("vscode", (t) => new VSCodeProvider(t.name, t.config, "vscode")).register(
9377
9564
  "vscode-insiders",
9378
9565
  (t) => new VSCodeProvider(t.name, t.config, "vscode-insiders")
9379
9566
  );
@@ -9564,15 +9751,15 @@ async function execFileWithStdinNode(argv, stdinPayload, options) {
9564
9751
  });
9565
9752
  }
9566
9753
  async function execShellWithStdin(command, stdinPayload, options = {}) {
9567
- const { mkdir: mkdir15, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
9754
+ const { mkdir: mkdir16, readFile: readFile12, rm: rm6, writeFile: writeFile9 } = await import("node:fs/promises");
9568
9755
  const { tmpdir: tmpdir3 } = await import("node:os");
9569
- const path44 = await import("node:path");
9570
- const { randomUUID: randomUUID9 } = await import("node:crypto");
9571
- const dir = path44.join(tmpdir3(), `agentv-exec-${randomUUID9()}`);
9572
- await mkdir15(dir, { recursive: true });
9573
- const stdinPath = path44.join(dir, "stdin.txt");
9574
- const stdoutPath = path44.join(dir, "stdout.txt");
9575
- const stderrPath = path44.join(dir, "stderr.txt");
9756
+ const path45 = await import("node:path");
9757
+ const { randomUUID: randomUUID10 } = await import("node:crypto");
9758
+ const dir = path45.join(tmpdir3(), `agentv-exec-${randomUUID10()}`);
9759
+ await mkdir16(dir, { recursive: true });
9760
+ const stdinPath = path45.join(dir, "stdin.txt");
9761
+ const stdoutPath = path45.join(dir, "stdout.txt");
9762
+ const stderrPath = path45.join(dir, "stderr.txt");
9576
9763
  await writeFile9(stdinPath, stdinPayload, "utf8");
9577
9764
  const wrappedCommand = process.platform === "win32" ? `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}` : `(${command}) < ${shellEscapePath(stdinPath)} > ${shellEscapePath(stdoutPath)} 2> ${shellEscapePath(stderrPath)}`;
9578
9765
  const { spawn: spawn5 } = await import("node:child_process");
@@ -10051,7 +10238,7 @@ import { generateText as generateText3 } from "ai";
10051
10238
 
10052
10239
  // src/evaluation/evaluators/llm-grader.ts
10053
10240
  import fs2 from "node:fs/promises";
10054
- import path32 from "node:path";
10241
+ import path33 from "node:path";
10055
10242
  import { generateText as generateText2, stepCountIs, tool } from "ai";
10056
10243
  import { z as z3 } from "zod";
10057
10244
  var DEFAULT_MAX_STEPS = 10;
@@ -10240,7 +10427,7 @@ ${context.fileChanges}`;
10240
10427
  async evaluateWithRubrics(context, graderProvider, rubrics) {
10241
10428
  if (!rubrics || rubrics.length === 0) {
10242
10429
  throw new Error(
10243
- `No rubrics found for evaluator "${context.evaluator?.name ?? "llm-grader"}". Run "agentv generate rubrics" first.`
10430
+ `No rubrics found for evaluator "${context.evaluator?.name ?? "llm-grader"}". Add rubric criteria under assertions or use the agentv-eval-writer skill for authoring help.`
10244
10431
  );
10245
10432
  }
10246
10433
  const hasScoreRanges = rubrics.some((r) => r.score_ranges && r.score_ranges.length > 0);
@@ -10906,8 +11093,8 @@ function calculateScoreRangeResult(result, rubrics) {
10906
11093
  };
10907
11094
  }
10908
11095
  function resolveSandboxed(basePath, relativePath) {
10909
- const resolved = path32.resolve(basePath, relativePath);
10910
- if (!resolved.startsWith(basePath + path32.sep) && resolved !== basePath) {
11096
+ const resolved = path33.resolve(basePath, relativePath);
11097
+ if (!resolved.startsWith(basePath + path33.sep) && resolved !== basePath) {
10911
11098
  throw new Error(`Path '${relativePath}' is outside the workspace`);
10912
11099
  }
10913
11100
  return resolved;
@@ -10997,11 +11184,11 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
10997
11184
  for (const entry of entries) {
10998
11185
  if (matches.length >= MAX_SEARCH_MATCHES) return;
10999
11186
  if (SEARCH_SKIP_DIRS.has(entry.name)) continue;
11000
- const fullPath = path32.join(dirPath, entry.name);
11187
+ const fullPath = path33.join(dirPath, entry.name);
11001
11188
  if (entry.isDirectory()) {
11002
11189
  await searchDirectory(fullPath, workspacePath, regex, matches);
11003
11190
  } else if (entry.isFile()) {
11004
- const ext = path32.extname(entry.name).toLowerCase();
11191
+ const ext = path33.extname(entry.name).toLowerCase();
11005
11192
  if (BINARY_EXTENSIONS.has(ext)) continue;
11006
11193
  try {
11007
11194
  const stat8 = await fs2.stat(fullPath);
@@ -11013,7 +11200,7 @@ async function searchDirectory(dirPath, workspacePath, regex, matches) {
11013
11200
  regex.lastIndex = 0;
11014
11201
  if (regex.test(lines[i])) {
11015
11202
  matches.push({
11016
- file: path32.relative(workspacePath, fullPath),
11203
+ file: path33.relative(workspacePath, fullPath),
11017
11204
  line: i + 1,
11018
11205
  text: lines[i].substring(0, 200)
11019
11206
  });
@@ -11648,115 +11835,115 @@ var FieldAccuracyEvaluator = class {
11648
11835
  * Evaluate a single field against the expected value.
11649
11836
  */
11650
11837
  evaluateField(fieldConfig, candidateData, expectedData) {
11651
- const { path: path44, match, required = true, weight = 1 } = fieldConfig;
11652
- const candidateValue = resolvePath(candidateData, path44);
11653
- const expectedValue = resolvePath(expectedData, path44);
11838
+ const { path: path45, match, required = true, weight = 1 } = fieldConfig;
11839
+ const candidateValue = resolvePath(candidateData, path45);
11840
+ const expectedValue = resolvePath(expectedData, path45);
11654
11841
  if (expectedValue === void 0) {
11655
11842
  return {
11656
- path: path44,
11843
+ path: path45,
11657
11844
  score: 1,
11658
11845
  // No expected value means no comparison needed
11659
11846
  weight,
11660
11847
  hit: true,
11661
- message: `${path44}: no expected value`
11848
+ message: `${path45}: no expected value`
11662
11849
  };
11663
11850
  }
11664
11851
  if (candidateValue === void 0) {
11665
11852
  if (required) {
11666
11853
  return {
11667
- path: path44,
11854
+ path: path45,
11668
11855
  score: 0,
11669
11856
  weight,
11670
11857
  hit: false,
11671
- message: `${path44} (required, missing)`
11858
+ message: `${path45} (required, missing)`
11672
11859
  };
11673
11860
  }
11674
11861
  return {
11675
- path: path44,
11862
+ path: path45,
11676
11863
  score: 1,
11677
11864
  // Don't penalize missing optional fields
11678
11865
  weight: 0,
11679
11866
  // Zero weight means it won't affect the score
11680
11867
  hit: true,
11681
- message: `${path44}: optional field missing`
11868
+ message: `${path45}: optional field missing`
11682
11869
  };
11683
11870
  }
11684
11871
  switch (match) {
11685
11872
  case "exact":
11686
- return this.compareExact(path44, candidateValue, expectedValue, weight);
11873
+ return this.compareExact(path45, candidateValue, expectedValue, weight);
11687
11874
  case "numeric_tolerance":
11688
11875
  return this.compareNumericTolerance(
11689
- path44,
11876
+ path45,
11690
11877
  candidateValue,
11691
11878
  expectedValue,
11692
11879
  fieldConfig,
11693
11880
  weight
11694
11881
  );
11695
11882
  case "date":
11696
- return this.compareDate(path44, candidateValue, expectedValue, fieldConfig, weight);
11883
+ return this.compareDate(path45, candidateValue, expectedValue, fieldConfig, weight);
11697
11884
  default:
11698
11885
  return {
11699
- path: path44,
11886
+ path: path45,
11700
11887
  score: 0,
11701
11888
  weight,
11702
11889
  hit: false,
11703
- message: `${path44}: unknown match type "${match}"`
11890
+ message: `${path45}: unknown match type "${match}"`
11704
11891
  };
11705
11892
  }
11706
11893
  }
11707
11894
  /**
11708
11895
  * Exact equality comparison.
11709
11896
  */
11710
- compareExact(path44, candidateValue, expectedValue, weight) {
11897
+ compareExact(path45, candidateValue, expectedValue, weight) {
11711
11898
  if (deepEqual(candidateValue, expectedValue)) {
11712
11899
  return {
11713
- path: path44,
11900
+ path: path45,
11714
11901
  score: 1,
11715
11902
  weight,
11716
11903
  hit: true,
11717
- message: path44
11904
+ message: path45
11718
11905
  };
11719
11906
  }
11720
11907
  if (typeof candidateValue !== typeof expectedValue) {
11721
11908
  return {
11722
- path: path44,
11909
+ path: path45,
11723
11910
  score: 0,
11724
11911
  weight,
11725
11912
  hit: false,
11726
- message: `${path44} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
11913
+ message: `${path45} (type mismatch: got ${typeof candidateValue}, expected ${typeof expectedValue})`
11727
11914
  };
11728
11915
  }
11729
11916
  return {
11730
- path: path44,
11917
+ path: path45,
11731
11918
  score: 0,
11732
11919
  weight,
11733
11920
  hit: false,
11734
- message: `${path44} (value mismatch)`
11921
+ message: `${path45} (value mismatch)`
11735
11922
  };
11736
11923
  }
11737
11924
  /**
11738
11925
  * Numeric comparison with absolute or relative tolerance.
11739
11926
  */
11740
- compareNumericTolerance(path44, candidateValue, expectedValue, fieldConfig, weight) {
11927
+ compareNumericTolerance(path45, candidateValue, expectedValue, fieldConfig, weight) {
11741
11928
  const { tolerance = 0, relative = false } = fieldConfig;
11742
11929
  const candidateNum = toNumber(candidateValue);
11743
11930
  const expectedNum = toNumber(expectedValue);
11744
11931
  if (candidateNum === null || expectedNum === null) {
11745
11932
  return {
11746
- path: path44,
11933
+ path: path45,
11747
11934
  score: 0,
11748
11935
  weight,
11749
11936
  hit: false,
11750
- message: `${path44} (non-numeric value)`
11937
+ message: `${path45} (non-numeric value)`
11751
11938
  };
11752
11939
  }
11753
11940
  if (!Number.isFinite(candidateNum) || !Number.isFinite(expectedNum)) {
11754
11941
  return {
11755
- path: path44,
11942
+ path: path45,
11756
11943
  score: 0,
11757
11944
  weight,
11758
11945
  hit: false,
11759
- message: `${path44} (invalid numeric value)`
11946
+ message: `${path45} (invalid numeric value)`
11760
11947
  };
11761
11948
  }
11762
11949
  const diff = Math.abs(candidateNum - expectedNum);
@@ -11769,61 +11956,61 @@ var FieldAccuracyEvaluator = class {
11769
11956
  }
11770
11957
  if (withinTolerance) {
11771
11958
  return {
11772
- path: path44,
11959
+ path: path45,
11773
11960
  score: 1,
11774
11961
  weight,
11775
11962
  hit: true,
11776
- message: `${path44} (within tolerance: diff=${diff.toFixed(2)})`
11963
+ message: `${path45} (within tolerance: diff=${diff.toFixed(2)})`
11777
11964
  };
11778
11965
  }
11779
11966
  return {
11780
- path: path44,
11967
+ path: path45,
11781
11968
  score: 0,
11782
11969
  weight,
11783
11970
  hit: false,
11784
- message: `${path44} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
11971
+ message: `${path45} (outside tolerance: diff=${diff.toFixed(2)}, tolerance=${tolerance})`
11785
11972
  };
11786
11973
  }
11787
11974
  /**
11788
11975
  * Date comparison with format normalization.
11789
11976
  */
11790
- compareDate(path44, candidateValue, expectedValue, fieldConfig, weight) {
11977
+ compareDate(path45, candidateValue, expectedValue, fieldConfig, weight) {
11791
11978
  const formats = fieldConfig.formats ?? DEFAULT_DATE_FORMATS;
11792
11979
  const candidateDate = parseDate(String(candidateValue), formats);
11793
11980
  const expectedDate = parseDate(String(expectedValue), formats);
11794
11981
  if (candidateDate === null) {
11795
11982
  return {
11796
- path: path44,
11983
+ path: path45,
11797
11984
  score: 0,
11798
11985
  weight,
11799
11986
  hit: false,
11800
- message: `${path44} (unparseable candidate date)`
11987
+ message: `${path45} (unparseable candidate date)`
11801
11988
  };
11802
11989
  }
11803
11990
  if (expectedDate === null) {
11804
11991
  return {
11805
- path: path44,
11992
+ path: path45,
11806
11993
  score: 0,
11807
11994
  weight,
11808
11995
  hit: false,
11809
- message: `${path44} (unparseable expected date)`
11996
+ message: `${path45} (unparseable expected date)`
11810
11997
  };
11811
11998
  }
11812
11999
  if (candidateDate.getFullYear() === expectedDate.getFullYear() && candidateDate.getMonth() === expectedDate.getMonth() && candidateDate.getDate() === expectedDate.getDate()) {
11813
12000
  return {
11814
- path: path44,
12001
+ path: path45,
11815
12002
  score: 1,
11816
12003
  weight,
11817
12004
  hit: true,
11818
- message: path44
12005
+ message: path45
11819
12006
  };
11820
12007
  }
11821
12008
  return {
11822
- path: path44,
12009
+ path: path45,
11823
12010
  score: 0,
11824
12011
  weight,
11825
12012
  hit: false,
11826
- message: `${path44} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
12013
+ message: `${path45} (date mismatch: got ${formatDateISO(candidateDate)}, expected ${formatDateISO(expectedDate)})`
11827
12014
  };
11828
12015
  }
11829
12016
  /**
@@ -11856,11 +12043,11 @@ var FieldAccuracyEvaluator = class {
11856
12043
  };
11857
12044
  }
11858
12045
  };
11859
- function resolvePath(obj, path44) {
11860
- if (!path44 || !obj) {
12046
+ function resolvePath(obj, path45) {
12047
+ if (!path45 || !obj) {
11861
12048
  return void 0;
11862
12049
  }
11863
- const parts = path44.split(/\.|\[|\]/).filter((p) => p.length > 0);
12050
+ const parts = path45.split(/\.|\[|\]/).filter((p) => p.length > 0);
11864
12051
  let current = obj;
11865
12052
  for (const part of parts) {
11866
12053
  if (current === null || current === void 0) {
@@ -12026,9 +12213,7 @@ var PROVIDER_TOOL_SEMANTICS = {
12026
12213
  "claude-sdk": CLAUDE_MATCHER,
12027
12214
  codex: CODEX_MATCHER,
12028
12215
  "pi-coding-agent": PI_CODING_AGENT_MATCHER,
12029
- // pi-agent-sdk has no tools, so skill detection is a no-op. Kept for completeness.
12030
- // TODO: consider removing pi-agent-sdk provider entirely.
12031
- "pi-agent-sdk": PI_CODING_AGENT_MATCHER,
12216
+ "pi-cli": PI_CODING_AGENT_MATCHER,
12032
12217
  "copilot-cli": COPILOT_MATCHER,
12033
12218
  "copilot-sdk": COPILOT_MATCHER,
12034
12219
  vscode: COPILOT_MATCHER,
@@ -12343,8 +12528,8 @@ var TokenUsageEvaluator = class {
12343
12528
  };
12344
12529
 
12345
12530
  // src/evaluation/evaluators/tool-trajectory.ts
12346
- function getNestedValue(obj, path44) {
12347
- const parts = path44.split(".");
12531
+ function getNestedValue(obj, path45) {
12532
+ const parts = path45.split(".");
12348
12533
  let current = obj;
12349
12534
  for (const part of parts) {
12350
12535
  if (current === null || current === void 0 || typeof current !== "object") {
@@ -12964,9 +13149,9 @@ function runEqualsAssertion(output, value) {
12964
13149
  }
12965
13150
 
12966
13151
  // src/evaluation/orchestrator.ts
12967
- import { createHash as createHash2, randomUUID as randomUUID8 } from "node:crypto";
12968
- import { copyFile as copyFile2, mkdir as mkdir13, readdir as readdir6, stat as stat7 } from "node:fs/promises";
12969
- import path41 from "node:path";
13152
+ import { createHash as createHash2, randomUUID as randomUUID9 } from "node:crypto";
13153
+ import { copyFile as copyFile2, mkdir as mkdir14, readdir as readdir6, stat as stat7 } from "node:fs/promises";
13154
+ import path42 from "node:path";
12970
13155
  import micromatch3 from "micromatch";
12971
13156
 
12972
13157
  // ../../node_modules/.bun/yocto-queue@1.2.2/node_modules/yocto-queue/index.js
@@ -13180,7 +13365,7 @@ var InlineAssertEvaluator = class {
13180
13365
  };
13181
13366
 
13182
13367
  // src/evaluation/evaluators/prompt-resolution.ts
13183
- import path33 from "node:path";
13368
+ import path34 from "node:path";
13184
13369
  async function resolveCustomPrompt(promptConfig, context, timeoutMs) {
13185
13370
  if (promptConfig.resolvedPromptScript && promptConfig.resolvedPromptScript.length > 0) {
13186
13371
  if (!context) {
@@ -13226,7 +13411,7 @@ async function executePromptTemplate(script, context, config, timeoutMs) {
13226
13411
  };
13227
13412
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
13228
13413
  const scriptPath = script[script.length - 1];
13229
- const cwd = path33.dirname(scriptPath);
13414
+ const cwd = path34.dirname(scriptPath);
13230
13415
  try {
13231
13416
  const stdout = await executeScript(script, inputJson, timeoutMs, cwd);
13232
13417
  const prompt = stdout.trim();
@@ -13498,16 +13683,16 @@ function createBuiltinRegistry() {
13498
13683
  }
13499
13684
 
13500
13685
  // src/evaluation/registry/assertion-discovery.ts
13501
- import path34 from "node:path";
13686
+ import path35 from "node:path";
13502
13687
  import fg2 from "fast-glob";
13503
13688
  async function discoverAssertions(registry, baseDir) {
13504
13689
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
13505
13690
  const candidateDirs = [];
13506
- let dir = path34.resolve(baseDir);
13507
- const root = path34.parse(dir).root;
13691
+ let dir = path35.resolve(baseDir);
13692
+ const root = path35.parse(dir).root;
13508
13693
  while (dir !== root) {
13509
- candidateDirs.push(path34.join(dir, ".agentv", "assertions"));
13510
- dir = path34.dirname(dir);
13694
+ candidateDirs.push(path35.join(dir, ".agentv", "assertions"));
13695
+ dir = path35.dirname(dir);
13511
13696
  }
13512
13697
  let files = [];
13513
13698
  for (const assertionsDir of candidateDirs) {
@@ -13523,7 +13708,7 @@ async function discoverAssertions(registry, baseDir) {
13523
13708
  }
13524
13709
  const discoveredTypes = [];
13525
13710
  for (const filePath of files) {
13526
- const basename = path34.basename(filePath);
13711
+ const basename = path35.basename(filePath);
13527
13712
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
13528
13713
  if (registry.has(typeName)) {
13529
13714
  continue;
@@ -13541,17 +13726,17 @@ async function discoverAssertions(registry, baseDir) {
13541
13726
  }
13542
13727
 
13543
13728
  // src/evaluation/registry/grader-discovery.ts
13544
- import path35 from "node:path";
13729
+ import path36 from "node:path";
13545
13730
  import fg3 from "fast-glob";
13546
13731
  async function discoverGraders(registry, baseDir) {
13547
13732
  const patterns = ["*.ts", "*.js", "*.mts", "*.mjs"];
13548
13733
  const candidateDirs = [];
13549
- let dir = path35.resolve(baseDir);
13550
- const root = path35.parse(dir).root;
13734
+ let dir = path36.resolve(baseDir);
13735
+ const root = path36.parse(dir).root;
13551
13736
  while (dir !== root) {
13552
- candidateDirs.push(path35.join(dir, ".agentv", "graders"));
13553
- candidateDirs.push(path35.join(dir, ".agentv", "judges"));
13554
- dir = path35.dirname(dir);
13737
+ candidateDirs.push(path36.join(dir, ".agentv", "graders"));
13738
+ candidateDirs.push(path36.join(dir, ".agentv", "judges"));
13739
+ dir = path36.dirname(dir);
13555
13740
  }
13556
13741
  let files = [];
13557
13742
  for (const gradersDir of candidateDirs) {
@@ -13567,7 +13752,7 @@ async function discoverGraders(registry, baseDir) {
13567
13752
  }
13568
13753
  const discoveredTypes = [];
13569
13754
  for (const filePath of files) {
13570
- const basename = path35.basename(filePath);
13755
+ const basename = path36.basename(filePath);
13571
13756
  const typeName = basename.replace(/\.(ts|js|mts|mjs)$/, "");
13572
13757
  if (registry.has(typeName)) {
13573
13758
  continue;
@@ -13727,7 +13912,7 @@ function getTCritical(df) {
13727
13912
  // src/evaluation/workspace/file-changes.ts
13728
13913
  import { exec as execCallback } from "node:child_process";
13729
13914
  import { readdirSync as readdirSync2, statSync } from "node:fs";
13730
- import path36 from "node:path";
13915
+ import path37 from "node:path";
13731
13916
  import { promisify as promisify4 } from "node:util";
13732
13917
  var execAsync4 = promisify4(execCallback);
13733
13918
  function gitExecOpts(workspacePath) {
@@ -13761,10 +13946,10 @@ async function stageNestedRepoChanges(workspacePath) {
13761
13946
  }
13762
13947
  for (const entry of entries) {
13763
13948
  if (entry === ".git" || entry === "node_modules") continue;
13764
- const childPath = path36.join(workspacePath, entry);
13949
+ const childPath = path37.join(workspacePath, entry);
13765
13950
  try {
13766
13951
  if (!statSync(childPath).isDirectory()) continue;
13767
- if (!statSync(path36.join(childPath, ".git")).isDirectory()) continue;
13952
+ if (!statSync(path37.join(childPath, ".git")).isDirectory()) continue;
13768
13953
  } catch {
13769
13954
  continue;
13770
13955
  }
@@ -13774,8 +13959,8 @@ async function stageNestedRepoChanges(workspacePath) {
13774
13959
  }
13775
13960
 
13776
13961
  // src/evaluation/workspace/manager.ts
13777
- import { cp, mkdir as mkdir11, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
13778
- import path37 from "node:path";
13962
+ import { cp, mkdir as mkdir12, readdir as readdir3, rm as rm4, stat as stat5 } from "node:fs/promises";
13963
+ import path38 from "node:path";
13779
13964
  var TemplateNotFoundError = class extends Error {
13780
13965
  constructor(templatePath) {
13781
13966
  super(`Workspace template not found: ${templatePath}`);
@@ -13805,14 +13990,14 @@ async function isDirectory(filePath) {
13805
13990
  }
13806
13991
  function getWorkspacePath(evalRunId, caseId, workspaceRoot) {
13807
13992
  const root = workspaceRoot ?? getWorkspacesRoot();
13808
- return path37.join(root, evalRunId, caseId);
13993
+ return path38.join(root, evalRunId, caseId);
13809
13994
  }
13810
13995
  async function copyDirectoryRecursive(src, dest) {
13811
- await mkdir11(dest, { recursive: true });
13996
+ await mkdir12(dest, { recursive: true });
13812
13997
  const entries = await readdir3(src, { withFileTypes: true });
13813
13998
  for (const entry of entries) {
13814
- const srcPath = path37.join(src, entry.name);
13815
- const destPath = path37.join(dest, entry.name);
13999
+ const srcPath = path38.join(src, entry.name);
14000
+ const destPath = path38.join(dest, entry.name);
13816
14001
  if (entry.name === ".git") {
13817
14002
  continue;
13818
14003
  }
@@ -13824,7 +14009,7 @@ async function copyDirectoryRecursive(src, dest) {
13824
14009
  }
13825
14010
  }
13826
14011
  async function createTempWorkspace(templatePath, evalRunId, caseId, workspaceRoot) {
13827
- const resolvedTemplatePath = path37.resolve(templatePath);
14012
+ const resolvedTemplatePath = path38.resolve(templatePath);
13828
14013
  if (!await fileExists(resolvedTemplatePath)) {
13829
14014
  throw new TemplateNotFoundError(resolvedTemplatePath);
13830
14015
  }
@@ -13873,7 +14058,7 @@ async function cleanupWorkspace(workspacePath) {
13873
14058
  }
13874
14059
  async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
13875
14060
  const root = workspaceRoot ?? getWorkspacesRoot();
13876
- const evalDir = path37.join(root, evalRunId);
14061
+ const evalDir = path38.join(root, evalRunId);
13877
14062
  if (await fileExists(evalDir)) {
13878
14063
  await rm4(evalDir, { recursive: true, force: true });
13879
14064
  }
@@ -13883,8 +14068,8 @@ async function cleanupEvalWorkspaces(evalRunId, workspaceRoot) {
13883
14068
  import { execFile } from "node:child_process";
13884
14069
  import { createHash } from "node:crypto";
13885
14070
  import { existsSync as existsSync2 } from "node:fs";
13886
- import { cp as cp2, mkdir as mkdir12, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
13887
- import path38 from "node:path";
14071
+ import { cp as cp2, mkdir as mkdir13, readFile as readFile10, readdir as readdir4, rm as rm5, unlink, writeFile as writeFile7 } from "node:fs/promises";
14072
+ import path39 from "node:path";
13888
14073
  import { promisify as promisify5 } from "node:util";
13889
14074
  var execFileAsync = promisify5(execFile);
13890
14075
  function gitEnv() {
@@ -13935,11 +14120,11 @@ function computeWorkspaceFingerprint(repos) {
13935
14120
  return createHash("sha256").update(JSON.stringify(canonical)).digest("hex");
13936
14121
  }
13937
14122
  async function copyDirectoryRecursive2(src, dest, skipDirs) {
13938
- await mkdir12(dest, { recursive: true });
14123
+ await mkdir13(dest, { recursive: true });
13939
14124
  const entries = await readdir4(src, { withFileTypes: true });
13940
14125
  for (const entry of entries) {
13941
- const srcPath = path38.join(src, entry.name);
13942
- const destPath = path38.join(dest, entry.name);
14126
+ const srcPath = path39.join(src, entry.name);
14127
+ const destPath = path39.join(dest, entry.name);
13943
14128
  if (entry.name === ".git") {
13944
14129
  continue;
13945
14130
  }
@@ -13972,8 +14157,8 @@ var WorkspacePoolManager = class {
13972
14157
  async acquireWorkspace(options) {
13973
14158
  const { templatePath, repos, maxSlots, repoManager, poolReset } = options;
13974
14159
  const fingerprint = computeWorkspaceFingerprint(repos);
13975
- const poolDir = path38.join(this.poolRoot, fingerprint);
13976
- await mkdir12(poolDir, { recursive: true });
14160
+ const poolDir = path39.join(this.poolRoot, fingerprint);
14161
+ await mkdir13(poolDir, { recursive: true });
13977
14162
  const drifted = await this.checkDrift(poolDir, fingerprint);
13978
14163
  if (drifted) {
13979
14164
  console.warn(
@@ -13982,7 +14167,7 @@ var WorkspacePoolManager = class {
13982
14167
  await this.removeAllSlots(poolDir);
13983
14168
  }
13984
14169
  for (let i = 0; i < maxSlots; i++) {
13985
- const slotPath = path38.join(poolDir, `slot-${i}`);
14170
+ const slotPath = path39.join(poolDir, `slot-${i}`);
13986
14171
  const lockPath = `${slotPath}.lock`;
13987
14172
  const locked = await this.tryLock(lockPath);
13988
14173
  if (!locked) {
@@ -14000,7 +14185,7 @@ var WorkspacePoolManager = class {
14000
14185
  poolDir
14001
14186
  };
14002
14187
  }
14003
- await mkdir12(slotPath, { recursive: true });
14188
+ await mkdir13(slotPath, { recursive: true });
14004
14189
  if (templatePath) {
14005
14190
  await copyDirectoryRecursive2(templatePath, slotPath);
14006
14191
  }
@@ -14069,7 +14254,7 @@ var WorkspacePoolManager = class {
14069
14254
  * Returns false (no drift) if metadata.json doesn't exist (first use).
14070
14255
  */
14071
14256
  async checkDrift(poolDir, fingerprint) {
14072
- const metadataPath = path38.join(poolDir, "metadata.json");
14257
+ const metadataPath = path39.join(poolDir, "metadata.json");
14073
14258
  try {
14074
14259
  const raw = await readFile10(metadataPath, "utf-8");
14075
14260
  const metadata = JSON.parse(raw);
@@ -14086,14 +14271,14 @@ var WorkspacePoolManager = class {
14086
14271
  repos,
14087
14272
  createdAt: (/* @__PURE__ */ new Date()).toISOString()
14088
14273
  };
14089
- await writeFile7(path38.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
14274
+ await writeFile7(path39.join(poolDir, "metadata.json"), JSON.stringify(metadata, null, 2));
14090
14275
  }
14091
14276
  /** Remove all slot directories and their lock files from a pool directory. */
14092
14277
  async removeAllSlots(poolDir) {
14093
14278
  const entries = await readdir4(poolDir);
14094
14279
  for (const entry of entries) {
14095
14280
  if (entry.startsWith("slot-") && !entry.endsWith(".lock")) {
14096
- const lockPath = path38.join(poolDir, `${entry}.lock`);
14281
+ const lockPath = path39.join(poolDir, `${entry}.lock`);
14097
14282
  if (existsSync2(lockPath)) {
14098
14283
  try {
14099
14284
  const pidStr = await readFile10(lockPath, "utf-8");
@@ -14109,12 +14294,12 @@ var WorkspacePoolManager = class {
14109
14294
  } catch {
14110
14295
  }
14111
14296
  }
14112
- await rm5(path38.join(poolDir, entry), { recursive: true, force: true });
14297
+ await rm5(path39.join(poolDir, entry), { recursive: true, force: true });
14113
14298
  await rm5(lockPath, { force: true }).catch(() => {
14114
14299
  });
14115
14300
  }
14116
14301
  }
14117
- await rm5(path38.join(poolDir, "metadata.json"), { force: true }).catch(() => {
14302
+ await rm5(path39.join(poolDir, "metadata.json"), { force: true }).catch(() => {
14118
14303
  });
14119
14304
  }
14120
14305
  /**
@@ -14124,7 +14309,7 @@ var WorkspacePoolManager = class {
14124
14309
  */
14125
14310
  async resetSlot(slotPath, templatePath, repos, poolReset = "fast") {
14126
14311
  for (const repo of repos) {
14127
- const repoDir = path38.join(slotPath, repo.path);
14312
+ const repoDir = path39.join(slotPath, repo.path);
14128
14313
  if (!existsSync2(repoDir)) {
14129
14314
  continue;
14130
14315
  }
@@ -14151,7 +14336,7 @@ var WorkspacePoolManager = class {
14151
14336
  // src/evaluation/workspace/repo-manager.ts
14152
14337
  import { execFile as execFile2 } from "node:child_process";
14153
14338
  import { existsSync as existsSync3 } from "node:fs";
14154
- import path39 from "node:path";
14339
+ import path40 from "node:path";
14155
14340
  import { promisify as promisify6 } from "node:util";
14156
14341
  var execFileAsync2 = promisify6(execFile2);
14157
14342
  var DEFAULT_TIMEOUT_MS2 = 3e5;
@@ -14251,7 +14436,7 @@ ${lines.join("\n")}`;
14251
14436
  * Handles checkout, ref resolution, ancestor walking, shallow clone, sparse checkout.
14252
14437
  */
14253
14438
  async materialize(repo, workspacePath) {
14254
- const targetDir = path39.join(workspacePath, repo.path);
14439
+ const targetDir = path40.join(workspacePath, repo.path);
14255
14440
  const sourceUrl = getSourceUrl(repo.source);
14256
14441
  const startedAt = Date.now();
14257
14442
  if (this.verbose) {
@@ -14342,7 +14527,7 @@ ${lines.join("\n")}`;
14342
14527
  async reset(repos, workspacePath, reset) {
14343
14528
  const cleanFlag = reset === "strict" ? "-fdx" : "-fd";
14344
14529
  for (const repo of repos) {
14345
- const targetDir = path39.join(workspacePath, repo.path);
14530
+ const targetDir = path40.join(workspacePath, repo.path);
14346
14531
  await this.runGit(["reset", "--hard", "HEAD"], { cwd: targetDir });
14347
14532
  await this.runGit(["clean", cleanFlag], { cwd: targetDir });
14348
14533
  }
@@ -14351,16 +14536,16 @@ ${lines.join("\n")}`;
14351
14536
 
14352
14537
  // src/evaluation/workspace/resolve.ts
14353
14538
  import { readdir as readdir5, stat as stat6 } from "node:fs/promises";
14354
- import path40 from "node:path";
14539
+ import path41 from "node:path";
14355
14540
  async function resolveWorkspaceTemplate(templatePath) {
14356
14541
  if (!templatePath) {
14357
14542
  return void 0;
14358
14543
  }
14359
- const resolved = path40.resolve(templatePath);
14544
+ const resolved = path41.resolve(templatePath);
14360
14545
  const stats = await stat6(resolved);
14361
14546
  if (stats.isFile()) {
14362
14547
  return {
14363
- dir: path40.dirname(resolved),
14548
+ dir: path41.dirname(resolved),
14364
14549
  workspaceFile: resolved
14365
14550
  };
14366
14551
  }
@@ -14372,14 +14557,14 @@ async function resolveWorkspaceTemplate(templatePath) {
14372
14557
  if (workspaceFiles.length === 1) {
14373
14558
  return {
14374
14559
  dir: resolved,
14375
- workspaceFile: path40.join(resolved, workspaceFiles[0])
14560
+ workspaceFile: path41.join(resolved, workspaceFiles[0])
14376
14561
  };
14377
14562
  }
14378
14563
  if (workspaceFiles.length > 1) {
14379
14564
  const conventionFile = workspaceFiles.find((f) => f === "template.code-workspace");
14380
14565
  return {
14381
14566
  dir: resolved,
14382
- workspaceFile: conventionFile ? path40.join(resolved, conventionFile) : void 0
14567
+ workspaceFile: conventionFile ? path41.join(resolved, conventionFile) : void 0
14383
14568
  };
14384
14569
  }
14385
14570
  return { dir: resolved };
@@ -14516,7 +14701,7 @@ async function runEvaluation(options) {
14516
14701
  );
14517
14702
  useCache = false;
14518
14703
  }
14519
- const evalRunId = randomUUID8();
14704
+ const evalRunId = randomUUID9();
14520
14705
  const evalCases = preloadedEvalCases ?? await loadTests(evalFilePath, repoRoot, { verbose, filter });
14521
14706
  const filteredEvalCases = filterEvalCases(evalCases, filter);
14522
14707
  if (filteredEvalCases.length === 0) {
@@ -14595,7 +14780,7 @@ async function runEvaluation(options) {
14595
14780
  ];
14596
14781
  const evaluatorRegistry = buildEvaluatorRegistry(evaluators, resolveGraderProvider);
14597
14782
  const typeRegistry = createBuiltinRegistry();
14598
- const discoveryBaseDir = evalFilePath ? path41.dirname(path41.resolve(evalFilePath)) : process.cwd();
14783
+ const discoveryBaseDir = evalFilePath ? path42.dirname(path42.resolve(evalFilePath)) : process.cwd();
14599
14784
  const evalDir = discoveryBaseDir;
14600
14785
  await discoverAssertions(typeRegistry, discoveryBaseDir);
14601
14786
  await discoverGraders(typeRegistry, discoveryBaseDir);
@@ -14742,7 +14927,7 @@ async function runEvaluation(options) {
14742
14927
  const isEmpty = dirExists ? (await readdir6(configuredStaticPath)).length === 0 : false;
14743
14928
  if (isYamlConfiguredPath && (!dirExists || isEmpty)) {
14744
14929
  if (!dirExists) {
14745
- await mkdir13(configuredStaticPath, { recursive: true });
14930
+ await mkdir14(configuredStaticPath, { recursive: true });
14746
14931
  }
14747
14932
  if (workspaceTemplate) {
14748
14933
  await copyDirectoryRecursive(workspaceTemplate, configuredStaticPath);
@@ -14787,12 +14972,12 @@ async function runEvaluation(options) {
14787
14972
  }
14788
14973
  } else if (suiteWorkspace?.hooks || suiteWorkspace?.repos?.length && !isPerTestIsolation) {
14789
14974
  sharedWorkspacePath = getWorkspacePath(evalRunId, "shared");
14790
- await mkdir13(sharedWorkspacePath, { recursive: true });
14975
+ await mkdir14(sharedWorkspacePath, { recursive: true });
14791
14976
  setupLog(`created empty shared workspace at: ${sharedWorkspacePath}`);
14792
14977
  }
14793
14978
  try {
14794
14979
  if (suiteWorkspaceFile && sharedWorkspacePath) {
14795
- const copiedWorkspaceFile = path41.join(sharedWorkspacePath, path41.basename(suiteWorkspaceFile));
14980
+ const copiedWorkspaceFile = path42.join(sharedWorkspacePath, path42.basename(suiteWorkspaceFile));
14796
14981
  try {
14797
14982
  await stat7(copiedWorkspaceFile);
14798
14983
  suiteWorkspaceFile = copiedWorkspaceFile;
@@ -15374,7 +15559,7 @@ async function runEvalCase(options) {
15374
15559
  );
15375
15560
  }
15376
15561
  if (caseWorkspaceFile && workspacePath) {
15377
- const copiedFile = path41.join(workspacePath, path41.basename(caseWorkspaceFile));
15562
+ const copiedFile = path42.join(workspacePath, path42.basename(caseWorkspaceFile));
15378
15563
  try {
15379
15564
  await stat7(copiedFile);
15380
15565
  caseWorkspaceFile = copiedFile;
@@ -15384,7 +15569,7 @@ async function runEvalCase(options) {
15384
15569
  }
15385
15570
  if (!workspacePath && (evalCase.workspace?.hooks || evalCase.workspace?.repos?.length) && evalRunId) {
15386
15571
  workspacePath = getWorkspacePath(evalRunId, evalCase.id);
15387
- await mkdir13(workspacePath, { recursive: true });
15572
+ await mkdir14(workspacePath, { recursive: true });
15388
15573
  }
15389
15574
  if (evalCase.workspace?.repos?.length && workspacePath) {
15390
15575
  const localPathErrors = RepoManager.validateLocalPaths(evalCase.workspace.repos);
@@ -15436,10 +15621,10 @@ async function runEvalCase(options) {
15436
15621
  const files = evalCase.metadata.agent_skills_files;
15437
15622
  if (baseDir && files.length > 0) {
15438
15623
  for (const relPath of files) {
15439
- const srcPath = path41.resolve(baseDir, relPath);
15440
- const destPath = path41.resolve(workspacePath, relPath);
15624
+ const srcPath = path42.resolve(baseDir, relPath);
15625
+ const destPath = path42.resolve(workspacePath, relPath);
15441
15626
  try {
15442
- await mkdir13(path41.dirname(destPath), { recursive: true });
15627
+ await mkdir14(path42.dirname(destPath), { recursive: true });
15443
15628
  await copyFile2(srcPath, destPath);
15444
15629
  } catch (error) {
15445
15630
  const message = error instanceof Error ? error.message : String(error);
@@ -16085,7 +16270,7 @@ async function runEvaluatorList(options) {
16085
16270
  fileChanges,
16086
16271
  workspacePath
16087
16272
  };
16088
- const evalFileDir = evalCase.file_paths[0] ? path41.dirname(evalCase.file_paths[0]) : process.cwd();
16273
+ const evalFileDir = evalCase.file_paths[0] ? path42.dirname(evalCase.file_paths[0]) : process.cwd();
16089
16274
  const dispatchContext = {
16090
16275
  graderProvider,
16091
16276
  targetResolver,
@@ -16419,7 +16604,7 @@ function computeWeightedMean(entries) {
16419
16604
 
16420
16605
  // src/evaluation/evaluate.ts
16421
16606
  import { existsSync as existsSync4 } from "node:fs";
16422
- import path42 from "node:path";
16607
+ import path43 from "node:path";
16423
16608
 
16424
16609
  // src/evaluation/providers/function-provider.ts
16425
16610
  function createFunctionProvider(taskFn) {
@@ -16456,7 +16641,7 @@ async function evaluate(config) {
16456
16641
  }
16457
16642
  const gitRoot = await findGitRoot(process.cwd());
16458
16643
  const repoRoot = gitRoot ?? process.cwd();
16459
- const testFilePath = config.specFile ? path42.resolve(config.specFile) : path42.join(process.cwd(), "__programmatic__.yaml");
16644
+ const testFilePath = config.specFile ? path43.resolve(config.specFile) : path43.join(process.cwd(), "__programmatic__.yaml");
16460
16645
  await loadEnvHierarchy(repoRoot, testFilePath);
16461
16646
  let resolvedTarget;
16462
16647
  let taskProvider;
@@ -16577,10 +16762,10 @@ function computeSummary(results, durationMs) {
16577
16762
  var TARGET_FILE_CANDIDATES = [".agentv/targets.yaml", ".agentv/targets.yml"];
16578
16763
  async function discoverDefaultTarget(repoRoot) {
16579
16764
  const cwd = process.cwd();
16580
- const chain = buildDirectoryChain(path42.join(cwd, "_placeholder"), repoRoot);
16765
+ const chain = buildDirectoryChain(path43.join(cwd, "_placeholder"), repoRoot);
16581
16766
  for (const dir of chain) {
16582
16767
  for (const candidate of TARGET_FILE_CANDIDATES) {
16583
- const targetsPath = path42.join(dir, candidate);
16768
+ const targetsPath = path43.join(dir, candidate);
16584
16769
  if (!existsSync4(targetsPath)) continue;
16585
16770
  try {
16586
16771
  const definitions = await readTargetDefinitions(targetsPath);
@@ -16597,7 +16782,7 @@ async function loadEnvHierarchy(repoRoot, startPath) {
16597
16782
  const chain = buildDirectoryChain(startPath, repoRoot);
16598
16783
  const envFiles = [];
16599
16784
  for (const dir of chain) {
16600
- const envPath = path42.join(dir, ".env");
16785
+ const envPath = path43.join(dir, ".env");
16601
16786
  if (existsSync4(envPath)) envFiles.push(envPath);
16602
16787
  }
16603
16788
  for (let i = 0; i < envFiles.length; i++) {
@@ -16778,8 +16963,8 @@ function buildPrompt(criteria, question, referenceAnswer) {
16778
16963
  }
16779
16964
 
16780
16965
  // src/evaluation/cache/response-cache.ts
16781
- import { mkdir as mkdir14, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
16782
- import path43 from "node:path";
16966
+ import { mkdir as mkdir15, readFile as readFile11, writeFile as writeFile8 } from "node:fs/promises";
16967
+ import path44 from "node:path";
16783
16968
  var DEFAULT_CACHE_PATH = ".agentv/cache";
16784
16969
  var ResponseCache = class {
16785
16970
  cachePath;
@@ -16797,13 +16982,13 @@ var ResponseCache = class {
16797
16982
  }
16798
16983
  async set(key, value) {
16799
16984
  const filePath = this.keyToPath(key);
16800
- const dir = path43.dirname(filePath);
16801
- await mkdir14(dir, { recursive: true });
16985
+ const dir = path44.dirname(filePath);
16986
+ await mkdir15(dir, { recursive: true });
16802
16987
  await writeFile8(filePath, JSON.stringify(value, null, 2), "utf8");
16803
16988
  }
16804
16989
  keyToPath(key) {
16805
16990
  const prefix = key.slice(0, 2);
16806
- return path43.join(this.cachePath, prefix, `${key}.json`);
16991
+ return path44.join(this.cachePath, prefix, `${key}.json`);
16807
16992
  }
16808
16993
  };
16809
16994
  function shouldEnableCache(params) {
@@ -16937,7 +17122,7 @@ var OtelTraceExporter = class {
16937
17122
  );
16938
17123
  }
16939
17124
  if (this.options.traceFilePath) {
16940
- const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await import("./simple-trace-file-exporter-S76DMABU.js");
17125
+ const { SimpleTraceFileExporter: SimpleTraceFileExporter2 } = await import("./simple-trace-file-exporter-CRIO5HDZ.js");
16941
17126
  processors.push(
16942
17127
  new SimpleSpanProcessor(new SimpleTraceFileExporter2(this.options.traceFilePath))
16943
17128
  );
@@ -17001,6 +17186,17 @@ var OtelTraceExporter = class {
17001
17186
  if (result.durationMs != null)
17002
17187
  rootSpan.setAttribute("agentv.trace.duration_ms", result.durationMs);
17003
17188
  if (result.costUsd != null) rootSpan.setAttribute("agentv.trace.cost_usd", result.costUsd);
17189
+ if (result.tokenUsage) {
17190
+ if (result.tokenUsage.input != null) {
17191
+ rootSpan.setAttribute("agentv.trace.token_input", result.tokenUsage.input);
17192
+ }
17193
+ if (result.tokenUsage.output != null) {
17194
+ rootSpan.setAttribute("agentv.trace.token_output", result.tokenUsage.output);
17195
+ }
17196
+ if (result.tokenUsage.cached != null) {
17197
+ rootSpan.setAttribute("agentv.trace.token_cached", result.tokenUsage.cached);
17198
+ }
17199
+ }
17004
17200
  if (result.trace) {
17005
17201
  const t = result.trace;
17006
17202
  rootSpan.setAttribute("agentv.trace.event_count", t.eventCount);
@@ -17103,6 +17299,7 @@ var OtelTraceExporter = class {
17103
17299
  tracer.startActiveSpan(
17104
17300
  spanName,
17105
17301
  { startTime: startHr },
17302
+ parentCtx,
17106
17303
  (span) => {
17107
17304
  if (isAssistant) {
17108
17305
  span.setAttribute("gen_ai.operation.name", "chat");
@@ -17135,6 +17332,7 @@ var OtelTraceExporter = class {
17135
17332
  tracer.startActiveSpan(
17136
17333
  `execute_tool ${tc.tool}`,
17137
17334
  {},
17335
+ msgCtx,
17138
17336
  (toolSpan) => {
17139
17337
  toolSpan.setAttribute("gen_ai.tool.name", tc.tool);
17140
17338
  if (tc.id) toolSpan.setAttribute("gen_ai.tool.call.id", tc.id);
@@ -17175,8 +17373,12 @@ var OtelStreamingObserver = class {
17175
17373
  rootSpan = null;
17176
17374
  // biome-ignore lint/suspicious/noExplicitAny: OTel context loaded dynamically
17177
17375
  rootCtx = null;
17376
+ observedChildSpans = false;
17377
+ pendingMetrics = null;
17178
17378
  /** Create root eval span immediately (visible in backend right away) */
17179
17379
  startEvalCase(testId, target, evalSet) {
17380
+ this.pendingMetrics = null;
17381
+ this.observedChildSpans = false;
17180
17382
  const ctx = this.parentCtx ?? this.api.context.active();
17181
17383
  this.rootSpan = this.tracer.startSpan("agentv.eval", void 0, ctx);
17182
17384
  this.rootSpan.setAttribute("gen_ai.operation.name", "evaluate");
@@ -17189,8 +17391,9 @@ var OtelStreamingObserver = class {
17189
17391
  /** Create and immediately export a tool span */
17190
17392
  onToolCall(name, input, output, _durationMs, toolCallId) {
17191
17393
  if (!this.rootCtx) return;
17394
+ this.observedChildSpans = true;
17192
17395
  this.api.context.with(this.rootCtx, () => {
17193
- const span = this.tracer.startSpan(`execute_tool ${name}`);
17396
+ const span = this.tracer.startSpan(`execute_tool ${name}`, void 0, this.rootCtx);
17194
17397
  span.setAttribute("gen_ai.tool.name", name);
17195
17398
  if (toolCallId) span.setAttribute("gen_ai.tool.call.id", toolCallId);
17196
17399
  if (this.captureContent) {
@@ -17211,8 +17414,9 @@ var OtelStreamingObserver = class {
17211
17414
  /** Create and immediately export an LLM span */
17212
17415
  onLlmCall(model, tokenUsage) {
17213
17416
  if (!this.rootCtx) return;
17417
+ this.observedChildSpans = true;
17214
17418
  this.api.context.with(this.rootCtx, () => {
17215
- const span = this.tracer.startSpan(`chat ${model}`);
17419
+ const span = this.tracer.startSpan(`chat ${model}`, void 0, this.rootCtx);
17216
17420
  span.setAttribute("gen_ai.operation.name", "chat");
17217
17421
  span.setAttribute("gen_ai.request.model", model);
17218
17422
  span.setAttribute("gen_ai.response.model", model);
@@ -17227,10 +17431,53 @@ var OtelStreamingObserver = class {
17227
17431
  span.end();
17228
17432
  });
17229
17433
  }
17434
+ /** Record final execution metrics before the root span is finalized. */
17435
+ recordEvalMetrics(result) {
17436
+ this.pendingMetrics = result;
17437
+ }
17230
17438
  /** Finalize root span with score/verdict after evaluation completes */
17231
17439
  finalizeEvalCase(score, error) {
17232
17440
  if (!this.rootSpan) return;
17233
17441
  this.rootSpan.setAttribute("agentv.score", score);
17442
+ if (this.pendingMetrics?.durationMs != null) {
17443
+ this.rootSpan.setAttribute("agentv.trace.duration_ms", this.pendingMetrics.durationMs);
17444
+ }
17445
+ if (this.pendingMetrics?.costUsd != null) {
17446
+ this.rootSpan.setAttribute("agentv.trace.cost_usd", this.pendingMetrics.costUsd);
17447
+ }
17448
+ if (this.pendingMetrics?.tokenUsage) {
17449
+ if (this.pendingMetrics.tokenUsage.input != null) {
17450
+ this.rootSpan.setAttribute(
17451
+ "agentv.trace.token_input",
17452
+ this.pendingMetrics.tokenUsage.input
17453
+ );
17454
+ }
17455
+ if (this.pendingMetrics.tokenUsage.output != null) {
17456
+ this.rootSpan.setAttribute(
17457
+ "agentv.trace.token_output",
17458
+ this.pendingMetrics.tokenUsage.output
17459
+ );
17460
+ }
17461
+ if (this.pendingMetrics.tokenUsage.cached != null) {
17462
+ this.rootSpan.setAttribute(
17463
+ "agentv.trace.token_cached",
17464
+ this.pendingMetrics.tokenUsage.cached
17465
+ );
17466
+ }
17467
+ }
17468
+ if (this.pendingMetrics?.trace) {
17469
+ this.rootSpan.setAttribute("agentv.trace.event_count", this.pendingMetrics.trace.eventCount);
17470
+ this.rootSpan.setAttribute(
17471
+ "agentv.trace.tool_names",
17472
+ Object.keys(this.pendingMetrics.trace.toolCalls).sort().join(",")
17473
+ );
17474
+ if (this.pendingMetrics.trace.llmCallCount != null) {
17475
+ this.rootSpan.setAttribute(
17476
+ "agentv.trace.llm_call_count",
17477
+ this.pendingMetrics.trace.llmCallCount
17478
+ );
17479
+ }
17480
+ }
17234
17481
  if (error) {
17235
17482
  this.rootSpan.setStatus({ code: this.api.SpanStatusCode.ERROR, message: error });
17236
17483
  } else {
@@ -17239,6 +17486,33 @@ var OtelStreamingObserver = class {
17239
17486
  this.rootSpan.end();
17240
17487
  this.rootSpan = null;
17241
17488
  this.rootCtx = null;
17489
+ this.observedChildSpans = false;
17490
+ this.pendingMetrics = null;
17491
+ }
17492
+ /** Backfill child spans from the completed result when the provider emitted no live callbacks. */
17493
+ completeFromResult(result) {
17494
+ this.recordEvalMetrics({
17495
+ durationMs: result.durationMs,
17496
+ costUsd: result.costUsd,
17497
+ tokenUsage: result.tokenUsage,
17498
+ trace: result.trace
17499
+ });
17500
+ if (this.observedChildSpans || !this.rootCtx) {
17501
+ return;
17502
+ }
17503
+ const model = result.output.find((msg) => msg.role === "assistant")?.metadata?.model ?? result.target ?? "unknown";
17504
+ this.onLlmCall(String(model), result.tokenUsage);
17505
+ for (const message of result.output) {
17506
+ for (const toolCall of message.toolCalls ?? []) {
17507
+ this.onToolCall(
17508
+ toolCall.tool,
17509
+ toolCall.input,
17510
+ toolCall.output,
17511
+ toolCall.durationMs ?? 0,
17512
+ toolCall.id
17513
+ );
17514
+ }
17515
+ }
17242
17516
  }
17243
17517
  /** Return the active eval span's trace ID and span ID for Braintrust trace bridging */
17244
17518
  getActiveSpanIds() {