agentv 3.10.2 → 3.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-6UE665XI.js → chunk-7LC3VNOC.js} +4 -4
- package/dist/{chunk-KGK5NUFG.js → chunk-JUQCB3ZW.js} +56 -15
- package/dist/chunk-JUQCB3ZW.js.map +1 -0
- package/dist/{chunk-F7LAJMTO.js → chunk-U556GRI3.js} +4 -4
- package/dist/{chunk-F7LAJMTO.js.map → chunk-U556GRI3.js.map} +1 -1
- package/dist/cli.js +3 -3
- package/dist/{dist-3QUJEJUT.js → dist-2X7A3TTC.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-EO6AR2R3.js → interactive-CSA4KIND.js} +3 -3
- package/dist/templates/.agentv/.env.example +9 -11
- package/dist/templates/.agentv/config.yaml +13 -4
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-KGK5NUFG.js.map +0 -1
- package/dist/templates/.agents/skills/agentv-chat-to-eval/README.md +0 -84
- package/dist/templates/.agents/skills/agentv-chat-to-eval/SKILL.md +0 -144
- package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-json.md +0 -67
- package/dist/templates/.agents/skills/agentv-chat-to-eval/examples/transcript-markdown.md +0 -101
- package/dist/templates/.agents/skills/agentv-eval-builder/SKILL.md +0 -458
- package/dist/templates/.agents/skills/agentv-eval-builder/references/config-schema.json +0 -36
- package/dist/templates/.agents/skills/agentv-eval-builder/references/custom-evaluators.md +0 -118
- package/dist/templates/.agents/skills/agentv-eval-builder/references/eval-schema.json +0 -12753
- package/dist/templates/.agents/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -77
- package/dist/templates/.agents/skills/agentv-eval-orchestrator/SKILL.md +0 -50
- package/dist/templates/.agents/skills/agentv-prompt-optimizer/SKILL.md +0 -78
- package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +0 -177
- package/dist/templates/.claude/skills/agentv-eval-builder/references/batch-cli-evaluator.md +0 -316
- package/dist/templates/.claude/skills/agentv-eval-builder/references/compare-command.md +0 -137
- package/dist/templates/.claude/skills/agentv-eval-builder/references/composite-evaluator.md +0 -215
- package/dist/templates/.claude/skills/agentv-eval-builder/references/config-schema.json +0 -27
- package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +0 -115
- package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +0 -278
- package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +0 -333
- package/dist/templates/.claude/skills/agentv-eval-builder/references/rubric-evaluator.md +0 -79
- package/dist/templates/.claude/skills/agentv-eval-builder/references/structured-data-evaluators.md +0 -121
- package/dist/templates/.claude/skills/agentv-eval-builder/references/tool-trajectory-evaluator.md +0 -298
- package/dist/templates/.claude/skills/agentv-prompt-optimizer/SKILL.md +0 -78
- package/dist/templates/.github/prompts/agentv-eval-build.prompt.md +0 -5
- package/dist/templates/.github/prompts/agentv-optimize.prompt.md +0 -4
- /package/dist/{chunk-6UE665XI.js.map → chunk-7LC3VNOC.js.map} +0 -0
- /package/dist/{dist-3QUJEJUT.js.map → dist-2X7A3TTC.js.map} +0 -0
- /package/dist/{interactive-EO6AR2R3.js.map → interactive-CSA4KIND.js.map} +0 -0
|
@@ -17,7 +17,7 @@ import {
|
|
|
17
17
|
validateEvalFile,
|
|
18
18
|
validateFileReferences,
|
|
19
19
|
validateTargetsFile
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-U556GRI3.js";
|
|
21
21
|
import {
|
|
22
22
|
createBuiltinRegistry,
|
|
23
23
|
createProvider,
|
|
@@ -35,7 +35,7 @@ import {
|
|
|
35
35
|
toSnakeCaseDeep as toSnakeCaseDeep2,
|
|
36
36
|
transpileEvalYamlFile,
|
|
37
37
|
trimBaselineResult
|
|
38
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-JUQCB3ZW.js";
|
|
39
39
|
import {
|
|
40
40
|
__commonJS,
|
|
41
41
|
__esm,
|
|
@@ -4187,7 +4187,7 @@ var evalRunCommand = command({
|
|
|
4187
4187
|
},
|
|
4188
4188
|
handler: async (args) => {
|
|
4189
4189
|
if (args.evalPaths.length === 0 && process.stdin.isTTY) {
|
|
4190
|
-
const { launchInteractiveWizard } = await import("./interactive-
|
|
4190
|
+
const { launchInteractiveWizard } = await import("./interactive-CSA4KIND.js");
|
|
4191
4191
|
await launchInteractiveWizard();
|
|
4192
4192
|
return;
|
|
4193
4193
|
}
|
|
@@ -6287,4 +6287,4 @@ export {
|
|
|
6287
6287
|
preprocessArgv,
|
|
6288
6288
|
runCli
|
|
6289
6289
|
};
|
|
6290
|
-
//# sourceMappingURL=chunk-
|
|
6290
|
+
//# sourceMappingURL=chunk-7LC3VNOC.js.map
|
|
@@ -19552,7 +19552,7 @@ ${basePrompt}` : basePrompt;
|
|
|
19552
19552
|
if (itemType === "command_execution") {
|
|
19553
19553
|
completedToolCalls.push({
|
|
19554
19554
|
tool: "command_execution",
|
|
19555
|
-
input: item.command,
|
|
19555
|
+
input: { command: item.command },
|
|
19556
19556
|
output: item.aggregated_output,
|
|
19557
19557
|
id: item.id
|
|
19558
19558
|
});
|
|
@@ -20383,11 +20383,22 @@ async function loadCopilotSdk() {
|
|
|
20383
20383
|
try {
|
|
20384
20384
|
copilotSdkModule = await import("@github/copilot-sdk");
|
|
20385
20385
|
} catch (error) {
|
|
20386
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
20387
|
+
if (message.includes("vscode-jsonrpc")) {
|
|
20388
|
+
throw new Error(
|
|
20389
|
+
`Failed to load @github/copilot-sdk due to a known ESM compatibility issue with vscode-jsonrpc (https://github.com/github/copilot-sdk/issues/710).
|
|
20390
|
+
|
|
20391
|
+
Workarounds:
|
|
20392
|
+
- Use the copilot-cli target instead (recommended): set target type to "copilot-cli" in your eval YAML
|
|
20393
|
+
- If running under Node.js 24+: set NODE_OPTIONS="--experimental-specifier-resolution=node"
|
|
20394
|
+
- Wait for vscode-jsonrpc@9.0.0 stable to be released upstream`
|
|
20395
|
+
);
|
|
20396
|
+
}
|
|
20386
20397
|
throw new Error(
|
|
20387
20398
|
`Failed to load @github/copilot-sdk. Please install it:
|
|
20388
20399
|
npm install @github/copilot-sdk
|
|
20389
20400
|
|
|
20390
|
-
Original error: ${
|
|
20401
|
+
Original error: ${message}`
|
|
20391
20402
|
);
|
|
20392
20403
|
}
|
|
20393
20404
|
}
|
|
@@ -21592,6 +21603,13 @@ function extractToolCalls4(content) {
|
|
|
21592
21603
|
id: typeof p.id === "string" ? p.id : void 0
|
|
21593
21604
|
});
|
|
21594
21605
|
}
|
|
21606
|
+
if (p.type === "toolCall" && typeof p.name === "string") {
|
|
21607
|
+
toolCalls.push({
|
|
21608
|
+
tool: p.name,
|
|
21609
|
+
input: p.arguments,
|
|
21610
|
+
id: typeof p.id === "string" ? p.id : void 0
|
|
21611
|
+
});
|
|
21612
|
+
}
|
|
21595
21613
|
if (p.type === "tool_result" && typeof p.tool_use_id === "string") {
|
|
21596
21614
|
const existing = toolCalls.find((tc) => tc.id === p.tool_use_id);
|
|
21597
21615
|
if (existing) {
|
|
@@ -25778,12 +25796,31 @@ var COPILOT_MATCHER = {
|
|
|
25778
25796
|
readToolPrefixes: ["Viewing "],
|
|
25779
25797
|
readInputFields: ["file_path", "path"]
|
|
25780
25798
|
};
|
|
25799
|
+
var PI_CODING_AGENT_MATCHER = {
|
|
25800
|
+
skillTools: [],
|
|
25801
|
+
skillInputField: "skill",
|
|
25802
|
+
readTools: ["read"],
|
|
25803
|
+
readInputField: "path",
|
|
25804
|
+
readInputFields: ["path", "file_path", "filePath"]
|
|
25805
|
+
};
|
|
25806
|
+
var CODEX_MATCHER = {
|
|
25807
|
+
skillTools: [],
|
|
25808
|
+
skillInputField: "skill",
|
|
25809
|
+
readTools: ["command_execution"],
|
|
25810
|
+
readInputField: "command",
|
|
25811
|
+
skillToolPrefixes: ["mcp:"],
|
|
25812
|
+
readToolPrefixes: ["mcp:"],
|
|
25813
|
+
readInputFields: ["command", "path", "file_path", "filePath"]
|
|
25814
|
+
};
|
|
25781
25815
|
var PROVIDER_TOOL_SEMANTICS = {
|
|
25782
25816
|
claude: CLAUDE_MATCHER,
|
|
25783
25817
|
"claude-cli": CLAUDE_MATCHER,
|
|
25784
25818
|
"claude-sdk": CLAUDE_MATCHER,
|
|
25785
|
-
|
|
25786
|
-
"pi-agent
|
|
25819
|
+
codex: CODEX_MATCHER,
|
|
25820
|
+
"pi-coding-agent": PI_CODING_AGENT_MATCHER,
|
|
25821
|
+
// pi-agent-sdk has no tools, so skill detection is a no-op. Kept for completeness.
|
|
25822
|
+
// TODO: consider removing pi-agent-sdk provider entirely.
|
|
25823
|
+
"pi-agent-sdk": PI_CODING_AGENT_MATCHER,
|
|
25787
25824
|
"copilot-cli": COPILOT_MATCHER,
|
|
25788
25825
|
"copilot-sdk": COPILOT_MATCHER,
|
|
25789
25826
|
vscode: COPILOT_MATCHER,
|
|
@@ -25807,33 +25844,37 @@ var SkillTriggerEvaluator = class {
|
|
|
25807
25844
|
const shouldTrigger = this.config.should_trigger !== false;
|
|
25808
25845
|
const providerKind = context2.provider?.kind;
|
|
25809
25846
|
const matcher = this.resolveMatcher(providerKind);
|
|
25810
|
-
const
|
|
25847
|
+
const allToolCalls = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? []);
|
|
25811
25848
|
let triggered = false;
|
|
25812
25849
|
let evidence = "";
|
|
25813
|
-
|
|
25814
|
-
const input =
|
|
25815
|
-
if (matcher.skillTools.includes(
|
|
25850
|
+
for (const toolCall of allToolCalls) {
|
|
25851
|
+
const input = toolCall.input ?? {};
|
|
25852
|
+
if (matcher.skillTools.includes(toolCall.tool)) {
|
|
25816
25853
|
const skillArg = String(input[matcher.skillInputField] ?? "");
|
|
25817
25854
|
if (skillArg.includes(skillName)) {
|
|
25818
25855
|
triggered = true;
|
|
25819
25856
|
evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
|
|
25857
|
+
break;
|
|
25820
25858
|
}
|
|
25821
25859
|
} else if (matcher.skillToolPrefixes?.some(
|
|
25822
|
-
(prefix) =>
|
|
25860
|
+
(prefix) => toolCall.tool.startsWith(prefix) && toolCall.tool.includes(skillName)
|
|
25823
25861
|
)) {
|
|
25824
25862
|
triggered = true;
|
|
25825
|
-
evidence = `Skill tool invoked via tool name "${
|
|
25826
|
-
|
|
25863
|
+
evidence = `Skill tool invoked via tool name "${toolCall.tool}"`;
|
|
25864
|
+
break;
|
|
25865
|
+
} else if (matcher.readTools.includes(toolCall.tool)) {
|
|
25827
25866
|
const filePath = this.readPathFromInput(input, matcher);
|
|
25828
25867
|
if (filePath.includes(skillName)) {
|
|
25829
25868
|
triggered = true;
|
|
25830
25869
|
evidence = `Read tool loaded skill file: ${filePath}`;
|
|
25870
|
+
break;
|
|
25831
25871
|
}
|
|
25832
25872
|
} else if (matcher.readToolPrefixes?.some(
|
|
25833
|
-
(prefix) =>
|
|
25873
|
+
(prefix) => toolCall.tool.startsWith(prefix) && toolCall.tool.includes(skillName)
|
|
25834
25874
|
)) {
|
|
25835
25875
|
triggered = true;
|
|
25836
|
-
evidence = `Read tool loaded skill file via tool name "${
|
|
25876
|
+
evidence = `Read tool loaded skill file via tool name "${toolCall.tool}"`;
|
|
25877
|
+
break;
|
|
25837
25878
|
}
|
|
25838
25879
|
}
|
|
25839
25880
|
const pass = triggered === shouldTrigger;
|
|
@@ -25855,7 +25896,7 @@ var SkillTriggerEvaluator = class {
|
|
|
25855
25896
|
verdict: "fail",
|
|
25856
25897
|
assertions: [
|
|
25857
25898
|
{
|
|
25858
|
-
text: shouldTrigger ?
|
|
25899
|
+
text: shouldTrigger ? allToolCalls.length > 0 ? `Skill "${skillName}" not found in ${allToolCalls.length} tool call(s)` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`,
|
|
25859
25900
|
passed: false
|
|
25860
25901
|
}
|
|
25861
25902
|
],
|
|
@@ -31062,4 +31103,4 @@ export {
|
|
|
31062
31103
|
OtelStreamingObserver,
|
|
31063
31104
|
createAgentKernel
|
|
31064
31105
|
};
|
|
31065
|
-
//# sourceMappingURL=chunk-
|
|
31106
|
+
//# sourceMappingURL=chunk-JUQCB3ZW.js.map
|