agentv 3.2.5 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
301
301
  }
302
302
  });
303
303
 
304
- // ../../packages/core/dist/chunk-DVFNM65P.js
304
+ // ../../packages/core/dist/chunk-C4MKEQR5.js
305
305
  import { constants } from "node:fs";
306
306
  import { access, readFile } from "node:fs/promises";
307
307
  import path from "node:path";
@@ -419,7 +419,7 @@ __export(external_exports2, {
419
419
  void: () => voidType
420
420
  });
421
421
 
422
- // ../../packages/core/dist/chunk-DVFNM65P.js
422
+ // ../../packages/core/dist/chunk-C4MKEQR5.js
423
423
  var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
424
424
  var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
425
425
  var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
@@ -14657,19 +14657,26 @@ function logWarning(message) {
14657
14657
  console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
14658
14658
  }
14659
14659
  var TEMPLATE_VARIABLES = {
14660
+ /** @deprecated Use OUTPUT_TEXT instead */
14660
14661
  ANSWER: "answer",
14661
14662
  EXPECTED_OUTPUT: "expected_output",
14663
+ /** @deprecated Use INPUT_TEXT instead */
14662
14664
  QUESTION: "question",
14663
14665
  CRITERIA: "criteria",
14666
+ /** @deprecated Use EXPECTED_OUTPUT_TEXT instead */
14664
14667
  REFERENCE_ANSWER: "reference_answer",
14665
14668
  INPUT: "input",
14666
14669
  OUTPUT: "output",
14667
- FILE_CHANGES: "file_changes"
14670
+ FILE_CHANGES: "file_changes",
14671
+ INPUT_TEXT: "input_text",
14672
+ OUTPUT_TEXT: "output_text",
14673
+ EXPECTED_OUTPUT_TEXT: "expected_output_text"
14668
14674
  };
14669
14675
  var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
14670
14676
  var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
14671
14677
  TEMPLATE_VARIABLES.ANSWER,
14672
- TEMPLATE_VARIABLES.EXPECTED_OUTPUT
14678
+ TEMPLATE_VARIABLES.EXPECTED_OUTPUT,
14679
+ TEMPLATE_VARIABLES.OUTPUT_TEXT
14673
14680
  ]);
14674
14681
  var ANSI_YELLOW3 = "\x1B[33m";
14675
14682
  var ANSI_RESET4 = "\x1B[0m";
@@ -14690,13 +14697,13 @@ function validateTemplateVariables(content, source) {
14690
14697
  }
14691
14698
  match = variablePattern.exec(content);
14692
14699
  }
14693
- const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER);
14700
+ const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
14694
14701
  const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
14695
14702
  const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
14696
14703
  if (!hasRequiredFields) {
14697
14704
  throw new Error(
14698
14705
  `Missing required fields. Must include at least one of:
14699
- - {{ ${TEMPLATE_VARIABLES.ANSWER} }}
14706
+ - {{ ${TEMPLATE_VARIABLES.ANSWER} }} or {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
14700
14707
  - {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
14701
14708
  );
14702
14709
  }
@@ -23868,7 +23875,11 @@ var CodeEvaluator = class {
23868
23875
  endTime: context2.endTime ?? null,
23869
23876
  fileChanges: context2.fileChanges ?? null,
23870
23877
  workspacePath: context2.workspacePath ?? null,
23871
- config: this.config ?? null
23878
+ config: this.config ?? null,
23879
+ // Text convenience accessors (new names, always strings)
23880
+ inputText: context2.evalCase.question,
23881
+ outputText: context2.candidate,
23882
+ expectedOutputText: context2.evalCase.reference_answer ?? ""
23872
23883
  };
23873
23884
  const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
23874
23885
  let proxyEnv;
@@ -24110,7 +24121,11 @@ var LlmGraderEvaluator = class {
24110
24121
  [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
24111
24122
  [TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
24112
24123
  [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
24113
- [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
24124
+ [TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
24125
+ // Text convenience accessors (new names, always strings)
24126
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
24127
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
24128
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
24114
24129
  };
24115
24130
  const systemPrompt = buildOutputSchema();
24116
24131
  const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -25950,28 +25965,60 @@ var LatencyEvaluator = class {
25950
25965
  };
25951
25966
  }
25952
25967
  };
25968
+ var CLAUDE_MATCHER = {
25969
+ skillTools: ["Skill"],
25970
+ skillInputField: "skill",
25971
+ readTools: ["Read"],
25972
+ readInputField: "file_path"
25973
+ };
25974
+ var COPILOT_MATCHER = {
25975
+ skillTools: ["Skill", "skill"],
25976
+ skillInputField: "skill",
25977
+ readTools: ["Read File", "readFile", "Read", "readTextFile"],
25978
+ readInputField: "file_path"
25979
+ };
25980
+ var PROVIDER_TOOL_SEMANTICS = {
25981
+ claude: CLAUDE_MATCHER,
25982
+ "claude-cli": CLAUDE_MATCHER,
25983
+ "claude-sdk": CLAUDE_MATCHER,
25984
+ "pi-coding-agent": CLAUDE_MATCHER,
25985
+ "pi-agent-sdk": CLAUDE_MATCHER,
25986
+ "copilot-cli": COPILOT_MATCHER,
25987
+ "copilot-sdk": COPILOT_MATCHER,
25988
+ vscode: COPILOT_MATCHER,
25989
+ "vscode-insiders": COPILOT_MATCHER
25990
+ };
25953
25991
  var SkillTriggerEvaluator = class {
25954
25992
  kind = "skill-trigger";
25955
25993
  config;
25956
25994
  constructor(config) {
25957
25995
  this.config = config;
25958
25996
  }
25997
+ resolveMatcher(providerKind) {
25998
+ if (providerKind) {
25999
+ const match = PROVIDER_TOOL_SEMANTICS[providerKind];
26000
+ if (match) return match;
26001
+ }
26002
+ return CLAUDE_MATCHER;
26003
+ }
25959
26004
  evaluate(context2) {
25960
26005
  const skillName = this.config.skill;
25961
26006
  const shouldTrigger = this.config.should_trigger !== false;
26007
+ const providerKind = context2.provider?.kind;
26008
+ const matcher = this.resolveMatcher(providerKind);
25962
26009
  const firstTool = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0];
25963
26010
  let triggered = false;
25964
26011
  let evidence = "";
25965
26012
  if (firstTool) {
25966
26013
  const input = firstTool.input ?? {};
25967
- if (firstTool.tool === "Skill") {
25968
- const skillArg = String(input.skill ?? "");
26014
+ if (matcher.skillTools.includes(firstTool.tool)) {
26015
+ const skillArg = String(input[matcher.skillInputField] ?? "");
25969
26016
  if (skillArg.includes(skillName)) {
25970
26017
  triggered = true;
25971
- evidence = `Skill tool invoked with skill="${skillArg}"`;
26018
+ evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
25972
26019
  }
25973
- } else if (firstTool.tool === "Read") {
25974
- const filePath = String(input.file_path ?? "");
26020
+ } else if (matcher.readTools.includes(firstTool.tool)) {
26021
+ const filePath = String(input[matcher.readInputField] ?? "");
25975
26022
  if (filePath.includes(skillName)) {
25976
26023
  triggered = true;
25977
26024
  evidence = `Read tool loaded skill file: ${filePath}`;
@@ -25996,7 +26043,7 @@ var SkillTriggerEvaluator = class {
25996
26043
  verdict: "fail",
25997
26044
  hits: [],
25998
26045
  misses: [
25999
- shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not Skill/Read for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`
26046
+ shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not a skill/read tool for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`
26000
26047
  ],
26001
26048
  expectedAspectCount: 1,
26002
26049
  reasoning: shouldTrigger ? `Skill "${skillName}" was not triggered` : "False trigger: skill fired when it should not have"
@@ -26038,7 +26085,11 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evalua
26038
26085
  [TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (evalCase.reference_answer ?? "").trim(),
26039
26086
  [TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
26040
26087
  [TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
26041
- [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? ""
26088
+ [TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
26089
+ // Text convenience accessors (new names, always strings)
26090
+ [TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
26091
+ [TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
26092
+ [TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
26042
26093
  };
26043
26094
  const systemPrompt = buildOutputSchema();
26044
26095
  const template = evaluatorTemplateOverride ?? DEFAULT_EVALUATOR_TEMPLATE;
@@ -27029,7 +27080,11 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
27029
27080
  trace: context2.trace ?? null,
27030
27081
  fileChanges: context2.fileChanges ?? null,
27031
27082
  workspacePath: context2.workspacePath ?? null,
27032
- config: config ?? context2.config ?? null
27083
+ config: config ?? context2.config ?? null,
27084
+ // Text convenience accessors (new names, always strings)
27085
+ inputText: context2.evalCase.question,
27086
+ outputText: context2.candidate,
27087
+ expectedOutputText: context2.evalCase.reference_answer ?? ""
27033
27088
  };
27034
27089
  const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
27035
27090
  const scriptPath = script[script.length - 1];
@@ -31148,4 +31203,4 @@ export {
31148
31203
  OtelStreamingObserver,
31149
31204
  createAgentKernel
31150
31205
  };
31151
- //# sourceMappingURL=chunk-6XTYVCMN.js.map
31206
+ //# sourceMappingURL=chunk-5M3K2DMV.js.map