agentv 3.2.5 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-DVFNM65P.js
304
+ // ../../packages/core/dist/chunk-JO4HIAEF.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-DVFNM65P.js
422
+ // ../../packages/core/dist/chunk-JO4HIAEF.js
423
423
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
424
424
  var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
425
425
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -1810,6 +1810,7 @@ var AGENT_PROVIDER_KINDS = [
1810
1810
  "copilot-sdk",
1811
1811
  "copilot-cli",
1812
1812
  "pi-coding-agent",
1813
+ "pi-agent-sdk",
1813
1814
  "claude",
1814
1815
  "claude-cli",
1815
1816
  "claude-sdk",
@@ -14657,19 +14658,26 @@ function logWarning(message) {
14657
14658
  console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
14658
14659
  }
14659
14660
  var TEMPLATE_VARIABLES = {
14661
+ /** @deprecated Use OUTPUT_TEXT instead */
14660
14662
  ANSWER: "answer",
14661
14663
  EXPECTED_OUTPUT: "expected_output",
14664
+ /** @deprecated Use INPUT_TEXT instead */
14662
14665
  QUESTION: "question",
14663
14666
  CRITERIA: "criteria",
14667
+ /** @deprecated Use EXPECTED_OUTPUT_TEXT instead */
14664
14668
  REFERENCE_ANSWER: "reference_answer",
14665
14669
  INPUT: "input",
14666
14670
  OUTPUT: "output",
14667
- FILE_CHANGES: "file_changes"
14671
+ FILE_CHANGES: "file_changes",
14672
+ INPUT_TEXT: "input_text",
14673
+ OUTPUT_TEXT: "output_text",
14674
+ EXPECTED_OUTPUT_TEXT: "expected_output_text"
14668
14675
  };
14669
14676
  var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
14670
14677
  var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
14671
14678
  TEMPLATE_VARIABLES.ANSWER,
14672
- TEMPLATE_VARIABLES.EXPECTED_OUTPUT
14679
+ TEMPLATE_VARIABLES.EXPECTED_OUTPUT,
14680
+ TEMPLATE_VARIABLES.OUTPUT_TEXT
14673
14681
  ]);
14674
14682
  var ANSI_YELLOW3 = "\x1B[33m";
14675
14683
  var ANSI_RESET4 = "\x1B[0m";
@@ -14690,13 +14698,13 @@ function validateTemplateVariables(content, source) {
14690
14698
  }
14691
14699
  match = variablePattern.exec(content);
14692
14700
  }
14693
- const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER);
14701
+ const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
14694
14702
  const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
14695
14703
  const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
14696
14704
  if (!hasRequiredFields) {
14697
14705
  throw new Error(
14698
14706
  `Missing required fields. Must include at least one of:
14699
- - {{ ${TEMPLATE_VARIABLES.ANSWER} }}
14707
+ - {{ ${TEMPLATE_VARIABLES.ANSWER} }} or {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
14700
14708
  - {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
14701
14709
  );
14702
14710
  }
@@ -20905,6 +20913,29 @@ var MockProvider = class {
20905
20913
  return this.delayMs;
20906
20914
  }
20907
20915
  };
20916
+ function extractPiTextContent(content) {
20917
+ if (typeof content === "string") {
20918
+ return content;
20919
+ }
20920
+ if (!Array.isArray(content)) {
20921
+ return void 0;
20922
+ }
20923
+ const textParts = [];
20924
+ for (const part of content) {
20925
+ if (!part || typeof part !== "object") {
20926
+ continue;
20927
+ }
20928
+ const p = part;
20929
+ if (p.type === "text" && typeof p.text === "string") {
20930
+ textParts.push(p.text);
20931
+ }
20932
+ }
20933
+ return textParts.length > 0 ? textParts.join("\n") : void 0;
20934
+ }
20935
+ function toFiniteNumber(value) {
20936
+ if (typeof value === "number" && Number.isFinite(value)) return value;
20937
+ return void 0;
20938
+ }
20908
20939
  var piAgentModule = null;
20909
20940
  var piAiModule = null;
20910
20941
  async function loadPiModules() {
@@ -20945,7 +20976,8 @@ var PiAgentSdkProvider = class {
20945
20976
  throw new Error("Pi agent SDK request was aborted before execution");
20946
20977
  }
20947
20978
  const { Agent, getModel, getEnvApiKey } = await loadPiModules();
20948
- const startTime = Date.now();
20979
+ const startTimeIso = (/* @__PURE__ */ new Date()).toISOString();
20980
+ const startMs = Date.now();
20949
20981
  const providerName = this.config.provider ?? "anthropic";
20950
20982
  const modelId = this.config.model ?? "claude-sonnet-4-20250514";
20951
20983
  const model = getModel(providerName, modelId);
@@ -20962,16 +20994,73 @@ var PiAgentSdkProvider = class {
20962
20994
  return this.config.apiKey ?? getEnvApiKey(provider) ?? void 0;
20963
20995
  }
20964
20996
  });
20965
- const output = [];
20966
- let finalAssistantContent = "";
20997
+ let tokenUsage;
20998
+ let costUsd;
20999
+ const toolTrackers = /* @__PURE__ */ new Map();
21000
+ const completedToolResults = /* @__PURE__ */ new Map();
20967
21001
  const unsubscribe = agent.subscribe((event) => {
20968
- if (event.type === "message_end") {
20969
- const msg = event.message;
20970
- if (msg.role === "assistant") {
20971
- const content = extractTextContent3(msg.content);
20972
- if (content) {
20973
- finalAssistantContent = content;
21002
+ switch (event.type) {
21003
+ case "message_end": {
21004
+ const msg = event.message;
21005
+ if (msg && typeof msg === "object" && "role" in msg && msg.role === "assistant" && "usage" in msg) {
21006
+ const usage = msg.usage;
21007
+ if (usage && typeof usage === "object") {
21008
+ const u = usage;
21009
+ const input = toFiniteNumber(u.input);
21010
+ const output = toFiniteNumber(u.output);
21011
+ const cached = toFiniteNumber(u.cacheRead);
21012
+ let callDelta;
21013
+ if (input !== void 0 || output !== void 0) {
21014
+ callDelta = {
21015
+ input: input ?? 0,
21016
+ output: output ?? 0,
21017
+ ...cached !== void 0 ? { cached } : {}
21018
+ };
21019
+ tokenUsage = {
21020
+ input: (tokenUsage?.input ?? 0) + callDelta.input,
21021
+ output: (tokenUsage?.output ?? 0) + callDelta.output,
21022
+ ...cached !== void 0 ? { cached: (tokenUsage?.cached ?? 0) + cached } : tokenUsage?.cached !== void 0 ? { cached: tokenUsage.cached } : {}
21023
+ };
21024
+ }
21025
+ const cost = u.cost;
21026
+ if (cost && typeof cost === "object") {
21027
+ const total = toFiniteNumber(cost.total);
21028
+ if (total !== void 0) {
21029
+ costUsd = (costUsd ?? 0) + total;
21030
+ }
21031
+ }
21032
+ request.streamCallbacks?.onLlmCallEnd?.(modelId, callDelta);
21033
+ }
20974
21034
  }
21035
+ break;
21036
+ }
21037
+ case "tool_execution_start": {
21038
+ toolTrackers.set(event.toolCallId, {
21039
+ toolCallId: event.toolCallId,
21040
+ toolName: event.toolName,
21041
+ args: event.args,
21042
+ startMs: Date.now(),
21043
+ startTime: (/* @__PURE__ */ new Date()).toISOString()
21044
+ });
21045
+ request.streamCallbacks?.onToolCallStart?.(event.toolName, event.toolCallId);
21046
+ break;
21047
+ }
21048
+ case "tool_execution_end": {
21049
+ const tracker = toolTrackers.get(event.toolCallId);
21050
+ const durationMs = tracker ? Date.now() - tracker.startMs : 0;
21051
+ completedToolResults.set(event.toolCallId, {
21052
+ output: event.result,
21053
+ durationMs
21054
+ });
21055
+ request.streamCallbacks?.onToolCallEnd?.(
21056
+ event.toolName,
21057
+ tracker?.args,
21058
+ event.result,
21059
+ durationMs,
21060
+ event.toolCallId
21061
+ );
21062
+ toolTrackers.delete(event.toolCallId);
21063
+ break;
20975
21064
  }
20976
21065
  }
20977
21066
  });
@@ -20990,10 +21079,12 @@ var PiAgentSdkProvider = class {
20990
21079
  }
20991
21080
  await agent.waitForIdle();
20992
21081
  const agentMessages = agent.state.messages;
21082
+ const output = [];
20993
21083
  for (const msg of agentMessages) {
20994
- output.push(convertAgentMessage(msg));
21084
+ output.push(convertAgentMessage(msg, toolTrackers, completedToolResults));
20995
21085
  }
20996
- const durationMs = Date.now() - startTime;
21086
+ const endTimeIso = (/* @__PURE__ */ new Date()).toISOString();
21087
+ const durationMs = Date.now() - startMs;
20997
21088
  return {
20998
21089
  raw: {
20999
21090
  messages: agentMessages,
@@ -21002,49 +21093,54 @@ var PiAgentSdkProvider = class {
21002
21093
  provider: this.config.provider
21003
21094
  },
21004
21095
  output,
21005
- durationMs
21096
+ tokenUsage,
21097
+ costUsd,
21098
+ durationMs,
21099
+ startTime: startTimeIso,
21100
+ endTime: endTimeIso
21006
21101
  };
21007
21102
  } finally {
21008
21103
  unsubscribe();
21009
21104
  }
21010
21105
  }
21011
21106
  };
21012
- function extractTextContent3(content) {
21013
- if (typeof content === "string") {
21014
- return content;
21015
- }
21016
- if (!Array.isArray(content)) {
21017
- return void 0;
21018
- }
21019
- const textParts = [];
21020
- for (const part of content) {
21021
- if (!part || typeof part !== "object") {
21022
- continue;
21023
- }
21024
- const p = part;
21025
- if (p.type === "text" && typeof p.text === "string") {
21026
- textParts.push(p.text);
21027
- }
21028
- }
21029
- return textParts.length > 0 ? textParts.join("\n") : void 0;
21030
- }
21031
- function convertAgentMessage(message) {
21107
+ function convertAgentMessage(message, toolTrackers, completedToolResults) {
21032
21108
  if (!message || typeof message !== "object") {
21033
21109
  return { role: "unknown", content: String(message) };
21034
21110
  }
21035
21111
  const msg = message;
21036
21112
  const role = typeof msg.role === "string" ? msg.role : "unknown";
21037
- const content = extractTextContent3(msg.content);
21038
- const toolCalls = extractToolCalls3(msg.content);
21113
+ const content = extractPiTextContent(msg.content);
21114
+ const toolCalls = extractToolCalls3(msg.content, toolTrackers, completedToolResults);
21039
21115
  const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
21116
+ let msgTokenUsage;
21117
+ if (msg.usage && typeof msg.usage === "object") {
21118
+ const u = msg.usage;
21119
+ const input = toFiniteNumber(u.input);
21120
+ const output = toFiniteNumber(u.output);
21121
+ if (input !== void 0 || output !== void 0) {
21122
+ msgTokenUsage = {
21123
+ input: input ?? 0,
21124
+ output: output ?? 0,
21125
+ ...toFiniteNumber(u.cacheRead) !== void 0 ? { cached: toFiniteNumber(u.cacheRead) } : {}
21126
+ };
21127
+ }
21128
+ }
21129
+ const metadata = {};
21130
+ if (msg.api) metadata.api = msg.api;
21131
+ if (msg.provider) metadata.provider = msg.provider;
21132
+ if (msg.model) metadata.model = msg.model;
21133
+ if (msg.stopReason) metadata.stopReason = msg.stopReason;
21040
21134
  return {
21041
21135
  role,
21042
21136
  content,
21043
21137
  toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
21044
- startTime
21138
+ startTime,
21139
+ metadata: Object.keys(metadata).length > 0 ? metadata : void 0,
21140
+ tokenUsage: msgTokenUsage
21045
21141
  };
21046
21142
  }
21047
- function extractToolCalls3(content) {
21143
+ function extractToolCalls3(content, toolTrackers, completedToolResults) {
21048
21144
  if (!Array.isArray(content)) {
21049
21145
  return [];
21050
21146
  }
@@ -21055,10 +21151,17 @@ function extractToolCalls3(content) {
21055
21151
  }
21056
21152
  const p = part;
21057
21153
  if (p.type === "toolCall" && typeof p.name === "string") {
21154
+ const id = typeof p.id === "string" ? p.id : void 0;
21155
+ const tracker = id ? toolTrackers.get(id) : void 0;
21156
+ const completed = id ? completedToolResults.get(id) : void 0;
21058
21157
  toolCalls.push({
21059
21158
  tool: p.name,
21060
21159
  input: p.arguments,
21061
- id: typeof p.id === "string" ? p.id : void 0
21160
+ id,
21161
+ output: completed?.output,
21162
+ durationMs: completed?.durationMs,
21163
+ startTime: tracker?.startTime,
21164
+ endTime: tracker?.startTime && completed?.durationMs !== void 0 ? new Date(new Date(tracker.startTime).getTime() + completed.durationMs).toISOString() : void 0
21062
21165
  });
21063
21166
  }
21064
21167
  }
@@ -21590,14 +21693,14 @@ function extractTokenUsage(events) {
21590
21693
  const usage = record.usage;
21591
21694
  if (usage && typeof usage === "object") {
21592
21695
  const u = usage;
21593
- const input = toNumber(u.input_tokens ?? u.inputTokens ?? u.input);
21594
- const output = toNumber(u.output_tokens ?? u.outputTokens ?? u.output);
21696
+ const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
21697
+ const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
21595
21698
  if (input !== void 0 || output !== void 0) {
21596
21699
  const result = {
21597
21700
  input: input ?? 0,
21598
21701
  output: output ?? 0
21599
21702
  };
21600
- const cached = toNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
21703
+ const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
21601
21704
  if (cached !== void 0) {
21602
21705
  return { ...result, cached };
21603
21706
  }
@@ -21622,13 +21725,13 @@ function aggregateUsageFromMessages(messages) {
21622
21725
  const usage = m.usage;
21623
21726
  if (!usage || typeof usage !== "object") continue;
21624
21727
  const u = usage;
21625
- const input = toNumber(u.input_tokens ?? u.inputTokens ?? u.input);
21626
- const output = toNumber(u.output_tokens ?? u.outputTokens ?? u.output);
21728
+ const input = toFiniteNumber(u.input_tokens ?? u.inputTokens ?? u.input);
21729
+ const output = toFiniteNumber(u.output_tokens ?? u.outputTokens ?? u.output);
21627
21730
  if (input !== void 0 || output !== void 0) {
21628
21731
  found = true;
21629
21732
  totalInput += input ?? 0;
21630
21733
  totalOutput += output ?? 0;
21631
- const cached = toNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
21734
+ const cached = toFiniteNumber(u.cache_read_input_tokens ?? u.cached ?? u.cachedTokens);
21632
21735
  if (cached !== void 0) {
21633
21736
  totalCached = (totalCached ?? 0) + cached;
21634
21737
  }
@@ -21641,10 +21744,6 @@ function aggregateUsageFromMessages(messages) {
21641
21744
  }
21642
21745
  return result;
21643
21746
  }
21644
- function toNumber(value) {
21645
- if (typeof value === "number" && Number.isFinite(value)) return value;
21646
- return void 0;
21647
- }
21648
21747
  function convertPiMessage(message) {
21649
21748
  if (!message || typeof message !== "object") {
21650
21749
  return void 0;
@@ -21654,7 +21753,7 @@ function convertPiMessage(message) {
21654
21753
  if (typeof role !== "string") {
21655
21754
  return void 0;
21656
21755
  }
21657
- const content = extractTextContent4(msg.content);
21756
+ const content = extractPiTextContent(msg.content);
21658
21757
  const toolCalls = extractToolCalls4(msg.content);
21659
21758
  const startTime = typeof msg.timestamp === "number" ? new Date(msg.timestamp).toISOString() : typeof msg.timestamp === "string" ? msg.timestamp : void 0;
21660
21759
  const metadata = {};
@@ -21671,25 +21770,6 @@ function convertPiMessage(message) {
21671
21770
  metadata: Object.keys(metadata).length > 0 ? metadata : void 0
21672
21771
  };
21673
21772
  }
21674
- function extractTextContent4(content) {
21675
- if (typeof content === "string") {
21676
- return content;
21677
- }
21678
- if (!Array.isArray(content)) {
21679
- return void 0;
21680
- }
21681
- const textParts = [];
21682
- for (const part of content) {
21683
- if (!part || typeof part !== "object") {
21684
- continue;
21685
- }
21686
- const p = part;
21687
- if (p.type === "text" && typeof p.text === "string") {
21688
- textParts.push(p.text);
21689
- }
21690
- }
21691
- return textParts.length > 0 ? textParts.join("\n") : void 0;
21692
- }
21693
21773
  function extractToolCalls4(content) {
21694
21774
  if (!Array.isArray(content)) {
21695
21775
  return [];
@@ -23868,7 +23948,11 @@ var CodeEvaluator = class {
23868
23948
  endTime: context2.endTime ?? null,
23869
23949
  fileChanges: context2.fileChanges ?? null,
23870
23950
  workspacePath: context2.workspacePath ?? null,
23871
- config: this.config ?? null
23951
+ config: this.config ?? null,
23952
+ // Text convenience accessors (new names, always strings)
23953
+ inputText: context2.evalCase.question,
23954
+ outputText: context2.candidate,
23955
+ expectedOutputText: context2.evalCase.reference_answer ?? ""
23872
23956
  };
23873
23957
  const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
23874
23958
  let proxyEnv;
@@ -24110,7 +24194,11 @@ var LlmGraderEvaluator = class {
24110
24194
  [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
24111
24195
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
24112
24196
  [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
24113
- [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
24197
+ [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
24198
+ // Text convenience accessors (new names, always strings)
24199
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
24200
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
24201
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
24114
24202
  };
24115
24203
  const systemPrompt = buildOutputSchema();
24116
24204
  const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -25694,8 +25782,8 @@ var FieldAccuracyEvaluator = class {
25694
25782
  */
25695
25783
  compareNumericTolerance(path46, candidateValue, expectedValue, fieldConfig, weight) {
25696
25784
  const { tolerance = 0, relative = false } = fieldConfig;
25697
- const candidateNum = toNumber2(candidateValue);
25698
- const expectedNum = toNumber2(expectedValue);
25785
+ const candidateNum = toNumber(candidateValue);
25786
+ const expectedNum = toNumber(expectedValue);
25699
25787
  if (candidateNum === null || expectedNum === null) {
25700
25788
  return {
25701
25789
  path: path46,
@@ -25841,7 +25929,7 @@ function resolvePath(obj, path46) {
25841
25929
  }
25842
25930
  return current;
25843
25931
  }
25844
- function toNumber2(value) {
25932
+ function toNumber(value) {
25845
25933
  if (typeof value === "number") {
25846
25934
  return value;
25847
25935
  }
@@ -25950,28 +26038,60 @@ var LatencyEvaluator = class {
25950
26038
  };
25951
26039
  }
25952
26040
  };
26041
+ var CLAUDE_MATCHER = {
26042
+ skillTools: ["Skill"],
26043
+ skillInputField: "skill",
26044
+ readTools: ["Read"],
26045
+ readInputField: "file_path"
26046
+ };
26047
+ var COPILOT_MATCHER = {
26048
+ skillTools: ["Skill", "skill"],
26049
+ skillInputField: "skill",
26050
+ readTools: ["Read File", "readFile", "Read", "readTextFile"],
26051
+ readInputField: "file_path"
26052
+ };
26053
+ var PROVIDER_TOOL_SEMANTICS = {
26054
+ claude: CLAUDE_MATCHER,
26055
+ "claude-cli": CLAUDE_MATCHER,
26056
+ "claude-sdk": CLAUDE_MATCHER,
26057
+ "pi-coding-agent": CLAUDE_MATCHER,
26058
+ "pi-agent-sdk": CLAUDE_MATCHER,
26059
+ "copilot-cli": COPILOT_MATCHER,
26060
+ "copilot-sdk": COPILOT_MATCHER,
26061
+ vscode: COPILOT_MATCHER,
26062
+ "vscode-insiders": COPILOT_MATCHER
26063
+ };
25953
26064
  var SkillTriggerEvaluator = class {
25954
26065
  kind = "skill-trigger";
25955
26066
  config;
25956
26067
  constructor(config) {
25957
26068
  this.config = config;
25958
26069
  }
26070
+ resolveMatcher(providerKind) {
26071
+ if (providerKind) {
26072
+ const match = PROVIDER_TOOL_SEMANTICS[providerKind];
26073
+ if (match) return match;
26074
+ }
26075
+ return CLAUDE_MATCHER;
26076
+ }
25959
26077
  evaluate(context2) {
25960
26078
  const skillName = this.config.skill;
25961
26079
  const shouldTrigger = this.config.should_trigger !== false;
26080
+ const providerKind = context2.provider?.kind;
26081
+ const matcher = this.resolveMatcher(providerKind);
25962
26082
  const firstTool = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0];
25963
26083
  let triggered = false;
25964
26084
  let evidence = "";
25965
26085
  if (firstTool) {
25966
26086
  const input = firstTool.input ?? {};
25967
- if (firstTool.tool === "Skill") {
25968
- const skillArg = String(input.skill ?? "");
26087
+ if (matcher.skillTools.includes(firstTool.tool)) {
26088
+ const skillArg = String(input[matcher.skillInputField] ?? "");
25969
26089
  if (skillArg.includes(skillName)) {
25970
26090
  triggered = true;
25971
- evidence = `Skill tool invoked with skill="${skillArg}"`;
26091
+ evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
25972
26092
  }
25973
- } else if (firstTool.tool === "Read") {
25974
- const filePath = String(input.file_path ?? "");
26093
+ } else if (matcher.readTools.includes(firstTool.tool)) {
26094
+ const filePath = String(input[matcher.readInputField] ?? "");
25975
26095
  if (filePath.includes(skillName)) {
25976
26096
  triggered = true;
25977
26097
  evidence = `Read tool loaded skill file: ${filePath}`;
@@ -25996,7 +26116,7 @@ var SkillTriggerEvaluator = class {
25996
26116
  verdict: "fail",
25997
26117
  hits: [],
25998
26118
  misses: [
25999
- shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not Skill/Read for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`
26119
+ shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not a skill/read tool for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`
26000
26120
  ],
26001
26121
  expectedAspectCount: 1,
26002
26122
  reasoning: shouldTrigger ? `Skill "${skillName}" was not triggered` : "False trigger: skill fired when it should not have"
@@ -26038,7 +26158,11 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evalua
26038
26158
  [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (evalCase.reference_answer ?? "").trim(),
26039
26159
  [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
26040
26160
  [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
26041
- [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? ""
26161
+ [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
26162
+ // Text convenience accessors (new names, always strings)
26163
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
26164
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
26165
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
26042
26166
  };
26043
26167
  const systemPrompt = buildOutputSchema();
26044
26168
  const template = evaluatorTemplateOverride ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -27029,7 +27153,11 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
27029
27153
  trace: context2.trace ?? null,
27030
27154
  fileChanges: context2.fileChanges ?? null,
27031
27155
  workspacePath: context2.workspacePath ?? null,
27032
- config: config ?? context2.config ?? null
27156
+ config: config ?? context2.config ?? null,
27157
+ // Text convenience accessors (new names, always strings)
27158
+ inputText: context2.evalCase.question,
27159
+ outputText: context2.candidate,
27160
+ expectedOutputText: context2.evalCase.reference_answer ?? ""
27033
27161
  };
27034
27162
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
27035
27163
  const scriptPath = script[script.length - 1];
@@ -28689,7 +28817,9 @@ async function runEvaluation(options) {
28689
28817
  testId: evalCase.id,
28690
28818
  status: "failed",
28691
28819
  completedAt: Date.now(),
28692
- error: budgetResult.error
28820
+ error: budgetResult.error,
28821
+ score: budgetResult.score,
28822
+ executionStatus: budgetResult.executionStatus
28693
28823
  });
28694
28824
  }
28695
28825
  if (onResult) {
@@ -28720,7 +28850,9 @@ async function runEvaluation(options) {
28720
28850
  testId: evalCase.id,
28721
28851
  status: "failed",
28722
28852
  completedAt: Date.now(),
28723
- error: haltResult.error
28853
+ error: haltResult.error,
28854
+ score: haltResult.score,
28855
+ executionStatus: haltResult.executionStatus
28724
28856
  });
28725
28857
  }
28726
28858
  if (onResult) {
@@ -28800,7 +28932,9 @@ async function runEvaluation(options) {
28800
28932
  startedAt: 0,
28801
28933
  // Not used for completed status
28802
28934
  completedAt: Date.now(),
28803
- error: result.error
28935
+ error: result.error,
28936
+ score: result.score,
28937
+ executionStatus: result.executionStatus
28804
28938
  });
28805
28939
  }
28806
28940
  if (onResult) {
@@ -28971,7 +29105,9 @@ async function runBatchEvaluation(options) {
28971
29105
  const merged = computed ? mergeExecutionMetrics(computed, {
28972
29106
  tokenUsage: providerResponse.tokenUsage,
28973
29107
  costUsd: providerResponse.costUsd,
28974
- durationMs: providerResponse.durationMs
29108
+ durationMs: providerResponse.durationMs,
29109
+ startTime: providerResponse.startTime,
29110
+ endTime: providerResponse.endTime
28975
29111
  }) : void 0;
28976
29112
  const trace2 = merged?.trace;
28977
29113
  const costUsd = merged?.costUsd;
@@ -29036,7 +29172,9 @@ async function runBatchEvaluation(options) {
29036
29172
  testId: evalCase.id,
29037
29173
  status: "failed",
29038
29174
  completedAt: Date.now(),
29039
- error: error instanceof Error ? error.message : String(error)
29175
+ error: error instanceof Error ? error.message : String(error),
29176
+ score: errorResult.score,
29177
+ executionStatus: errorResult.executionStatus
29040
29178
  });
29041
29179
  }
29042
29180
  continue;
@@ -29052,7 +29190,9 @@ async function runBatchEvaluation(options) {
29052
29190
  status: result.error ? "failed" : "completed",
29053
29191
  startedAt: 0,
29054
29192
  completedAt: Date.now(),
29055
- error: result.error
29193
+ error: result.error,
29194
+ score: result.score,
29195
+ executionStatus: result.executionStatus
29056
29196
  });
29057
29197
  }
29058
29198
  }
@@ -29362,7 +29502,9 @@ async function runEvalCase(options) {
29362
29502
  const merged = computed ? mergeExecutionMetrics(computed, {
29363
29503
  tokenUsage: providerResponse.tokenUsage,
29364
29504
  costUsd: providerResponse.costUsd,
29365
- durationMs: providerResponse.durationMs
29505
+ durationMs: providerResponse.durationMs,
29506
+ startTime: providerResponse.startTime,
29507
+ endTime: providerResponse.endTime
29366
29508
  }) : void 0;
29367
29509
  const trace2 = merged?.trace;
29368
29510
  const costUsd = merged?.costUsd;
@@ -31148,4 +31290,4 @@ export {
31148
31290
  OtelStreamingObserver,
31149
31291
  createAgentKernel
31150
31292
  };
31151
- //# sourceMappingURL=chunk-6XTYVCMN.js.map
31293
+ //# sourceMappingURL=chunk-GOZV2HN2.js.map