agentv 3.10.2 → 3.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/{chunk-6UE665XI.js → chunk-7LC3VNOC.js} +4 -4
  2. package/dist/{chunk-KGK5NUFG.js → chunk-JUQCB3ZW.js} +56 -15
  3. package/dist/chunk-JUQCB3ZW.js.map +1 -0
  4. package/dist/{chunk-F7LAJMTO.js → chunk-U556GRI3.js} +4 -4
  5. package/dist/{chunk-F7LAJMTO.js.map → chunk-U556GRI3.js.map} +1 -1
  6. package/dist/cli.js +3 -3
  7. package/dist/{dist-3QUJEJUT.js → dist-2X7A3TTC.js} +2 -2
  8. package/dist/index.js +3 -3
  9. package/dist/{interactive-EO6AR2R3.js → interactive-CSA4KIND.js} +3 -3
  10. package/dist/templates/.agentv/.env.example +9 -11
  11. package/dist/templates/.agentv/config.yaml +13 -4
  12. package/dist/templates/.agentv/targets.yaml +16 -0
  13. package/package.json +1 -1
  14. package/dist/chunk-KGK5NUFG.js.map +0 -1
  15. package/dist/templates/.agents/skills/agentv-chat-to-eval/README.md +0 -84
  16. package/dist/templates/.agents/skills/agentv-chat-to-eval/SKILL.md +0 -144
  17. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-json.md +0 -67
  18. package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-markdown.md +0 -101
  19. package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +0 -458
  20. package/dist/templates/.agents/skills/agentv-eval-builder/references/config-schema.json +0 -36
  21. package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +0 -118
  22. package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +0 -12753
  23. package/dist/templates/.agents/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -77
  24. package/dist/templates/.agents/skills/agentv-eval-orchestrator/SKILL.md +0 -50
  25. package/dist/templates/.agents/skills/agentv-prompt-optimizer/SKILL.md +0 -78
  26. package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +0 -177
  27. package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +0 -316
  28. package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +0 -137
  29. package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +0 -215
  30. package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +0 -27
  31. package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +0 -115
  32. package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +0 -278
  33. package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +0 -333
  34. package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -79
  35. package/dist/templates/.claude/skills/agentv-eval-builder/references/structured-data-evaluators.md +0 -121
  36. package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +0 -298
  37. package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +0 -78
  38. package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +0 -5
  39. package/dist/templates/.github/prompts/agentv-optimize.prompt.md +0 -4
  40. /package/dist/{chunk-6UE665XI.js.map → chunk-7LC3VNOC.js.map} +0 -0
  41. /package/dist/{dist-3QUJEJUT.js.map → dist-2X7A3TTC.js.map} +0 -0
  42. /package/dist/{interactive-EO6AR2R3.js.map → interactive-CSA4KIND.js.map} +0 -0
@@ -17,7 +17,7 @@ import {
17
17
  validateEvalFile,
18
18
  validateFileReferences,
19
19
  validateTargetsFile
20
- } from "./chunk-F7LAJMTO.js";
20
+ } from "./chunk-U556GRI3.js";
21
21
  import {
22
22
  createBuiltinRegistry,
23
23
  createProvider,
@@ -35,7 +35,7 @@ import {
35
35
  toSnakeCaseDeep as toSnakeCaseDeep2,
36
36
  transpileEvalYamlFile,
37
37
  trimBaselineResult
38
- } from "./chunk-KGK5NUFG.js";
38
+ } from "./chunk-JUQCB3ZW.js";
39
39
  import {
40
40
  __commonJS,
41
41
  __esm,
@@ -4187,7 +4187,7 @@ var evalRunCommand = command({
4187
4187
  },
4188
4188
  handler: async (args) => {
4189
4189
  if (args.evalPaths.length === 0 && process.stdin.isTTY) {
4190
- const { launchInteractiveWizard } = await import("./interactive-EO6AR2R3.js");
4190
+ const { launchInteractiveWizard } = await import("./interactive-CSA4KIND.js");
4191
4191
  await launchInteractiveWizard();
4192
4192
  return;
4193
4193
  }
@@ -6287,4 +6287,4 @@ export {
6287
6287
  preprocessArgv,
6288
6288
  runCli
6289
6289
  };
6290
- //# sourceMappingURL=chunk-6UE665XI.js.map
6290
+ //# sourceMappingURL=chunk-7LC3VNOC.js.map
@@ -19552,7 +19552,7 @@ ${basePrompt}` : basePrompt;
19552
19552
  if (itemType === "command_execution") {
19553
19553
  completedToolCalls.push({
19554
19554
  tool: "command_execution",
19555
- input: item.command,
19555
+ input: { command: item.command },
19556
19556
  output: item.aggregated_output,
19557
19557
  id: item.id
19558
19558
  });
@@ -20383,11 +20383,22 @@ async function loadCopilotSdk() {
20383
20383
  try {
20384
20384
  copilotSdkModule = await import("@github/copilot-sdk");
20385
20385
  } catch (error) {
20386
+ const message = error instanceof Error ? error.message : String(error);
20387
+ if (message.includes("vscode-jsonrpc")) {
20388
+ throw new Error(
20389
+ `Failed to load @github/copilot-sdk due to a known ESM compatibility issue with vscode-jsonrpc (https://github.com/github/copilot-sdk/issues/710).
20390
+
20391
+ Workarounds:
20392
+ - Use the copilot-cli target instead (recommended): set target type to "copilot-cli" in your eval YAML
20393
+ - If running under Node.js 24+: set NODE_OPTIONS="--experimental-specifier-resolution=node"
20394
+ - Wait for vscode-jsonrpc@9.0.0 stable to be released upstream`
20395
+ );
20396
+ }
20386
20397
  throw new Error(
20387
20398
  `Failed to load @github/copilot-sdk. Please install it:
20388
20399
  npm install @github/copilot-sdk
20389
20400
 
20390
- Original error: ${error instanceof Error ? error.message : String(error)}`
20401
+ Original error: ${message}`
20391
20402
  );
20392
20403
  }
20393
20404
  }
@@ -21592,6 +21603,13 @@ function extractToolCalls4(content) {
21592
21603
  id: typeof p.id === "string" ? p.id : void 0
21593
21604
  });
21594
21605
  }
21606
+ if (p.type === "toolCall" && typeof p.name === "string") {
21607
+ toolCalls.push({
21608
+ tool: p.name,
21609
+ input: p.arguments,
21610
+ id: typeof p.id === "string" ? p.id : void 0
21611
+ });
21612
+ }
21595
21613
  if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
21596
21614
  const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
21597
21615
  if (existing) {
@@ -25778,12 +25796,31 @@ var COPILOT_MATCHER = {
25778
25796
  readToolPrefixes: ["Viewing "],
25779
25797
  readInputFields: ["file_path", "path"]
25780
25798
  };
25799
+ var PI_CODING_AGENT_MATCHER = {
25800
+ skillTools: [],
25801
+ skillInputField: "skill",
25802
+ readTools: ["read"],
25803
+ readInputField: "path",
25804
+ readInputFields: ["path", "file_path", "filePath"]
25805
+ };
25806
+ var CODEX_MATCHER = {
25807
+ skillTools: [],
25808
+ skillInputField: "skill",
25809
+ readTools: ["command_execution"],
25810
+ readInputField: "command",
25811
+ skillToolPrefixes: ["mcp:"],
25812
+ readToolPrefixes: ["mcp:"],
25813
+ readInputFields: ["command", "path", "file_path", "filePath"]
25814
+ };
25781
25815
  var PROVIDER_TOOL_SEMANTICS = {
25782
25816
  claude: CLAUDE_MATCHER,
25783
25817
  "claude-cli": CLAUDE_MATCHER,
25784
25818
  "claude-sdk": CLAUDE_MATCHER,
25785
- "pi-coding-agent": CLAUDE_MATCHER,
25786
- "pi-agent-sdk": CLAUDE_MATCHER,
25819
+ codex: CODEX_MATCHER,
25820
+ "pi-coding-agent": PI_CODING_AGENT_MATCHER,
25821
+ // pi-agent-sdk has no tools, so skill detection is a no-op. Kept for completeness.
25822
+ // TODO: consider removing pi-agent-sdk provider entirely.
25823
+ "pi-agent-sdk": PI_CODING_AGENT_MATCHER,
25787
25824
  "copilot-cli": COPILOT_MATCHER,
25788
25825
  "copilot-sdk": COPILOT_MATCHER,
25789
25826
  vscode: COPILOT_MATCHER,
@@ -25807,33 +25844,37 @@ var SkillTriggerEvaluator = class {
25807
25844
  const shouldTrigger = this.config.should_trigger !== false;
25808
25845
  const providerKind = context2.provider?.kind;
25809
25846
  const matcher = this.resolveMatcher(providerKind);
25810
- const firstTool = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0];
25847
+ const allToolCalls = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? []);
25811
25848
  let triggered = false;
25812
25849
  let evidence = "";
25813
- if (firstTool) {
25814
- const input = firstTool.input ?? {};
25815
- if (matcher.skillTools.includes(firstTool.tool)) {
25850
+ for (const toolCall of allToolCalls) {
25851
+ const input = toolCall.input ?? {};
25852
+ if (matcher.skillTools.includes(toolCall.tool)) {
25816
25853
  const skillArg = String(input[matcher.skillInputField] ?? "");
25817
25854
  if (skillArg.includes(skillName)) {
25818
25855
  triggered = true;
25819
25856
  evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
25857
+ break;
25820
25858
  }
25821
25859
  } else if (matcher.skillToolPrefixes?.some(
25822
- (prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
25860
+ (prefix) => toolCall.tool.startsWith(prefix) && toolCall.tool.includes(skillName)
25823
25861
  )) {
25824
25862
  triggered = true;
25825
- evidence = `Skill tool invoked via tool name "${firstTool.tool}"`;
25826
- } else if (matcher.readTools.includes(firstTool.tool)) {
25863
+ evidence = `Skill tool invoked via tool name "${toolCall.tool}"`;
25864
+ break;
25865
+ } else if (matcher.readTools.includes(toolCall.tool)) {
25827
25866
  const filePath = this.readPathFromInput(input, matcher);
25828
25867
  if (filePath.includes(skillName)) {
25829
25868
  triggered = true;
25830
25869
  evidence = `Read tool loaded skill file: ${filePath}`;
25870
+ break;
25831
25871
  }
25832
25872
  } else if (matcher.readToolPrefixes?.some(
25833
- (prefix) => firstTool.tool.startsWith(prefix) && firstTool.tool.includes(skillName)
25873
+ (prefix) => toolCall.tool.startsWith(prefix) && toolCall.tool.includes(skillName)
25834
25874
  )) {
25835
25875
  triggered = true;
25836
- evidence = `Read tool loaded skill file via tool name "${firstTool.tool}"`;
25876
+ evidence = `Read tool loaded skill file via tool name "${toolCall.tool}"`;
25877
+ break;
25837
25878
  }
25838
25879
  }
25839
25880
  const pass = triggered === shouldTrigger;
@@ -25855,7 +25896,7 @@ var SkillTriggerEvaluator = class {
25855
25896
  verdict: "fail",
25856
25897
  assertions: [
25857
25898
  {
25858
- text: shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not a skill/read tool for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`,
25899
+ text: shouldTrigger ? allToolCalls.length > 0 ? `Skill "${skillName}" not found in ${allToolCalls.length} tool call(s)` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`,
25859
25900
  passed: false
25860
25901
  }
25861
25902
  ],
@@ -31062,4 +31103,4 @@ export {
31062
31103
  OtelStreamingObserver,
31063
31104
  createAgentKernel
31064
31105
  };
31065
- //# sourceMappingURL=chunk-KGK5NUFG.js.map
31106
+ //# sourceMappingURL=chunk-JUQCB3ZW.js.map