agentv 3.2.5 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-WQGBWX5Y.js → chunk-4ZMSAQWS.js} +4 -4
- package/dist/{chunk-WQGBWX5Y.js.map → chunk-4ZMSAQWS.js.map} +1 -1
- package/dist/{chunk-6XTYVCMN.js → chunk-5M3K2DMV.js} +72 -17
- package/dist/chunk-5M3K2DMV.js.map +1 -0
- package/dist/{chunk-BPK64EWF.js → chunk-6LP5Z5Y4.js} +9 -5
- package/dist/chunk-6LP5Z5Y4.js.map +1 -0
- package/dist/cli.js +3 -3
- package/dist/{dist-JXD6WHHI.js → dist-OC53WD3P.js} +2 -2
- package/dist/index.js +3 -3
- package/dist/{interactive-B72SWNWB.js → interactive-NA6SAIAG.js} +3 -3
- package/package.json +1 -1
- package/dist/chunk-6XTYVCMN.js.map +0 -1
- package/dist/chunk-BPK64EWF.js.map +0 -1
- /package/dist/{dist-JXD6WHHI.js.map → dist-OC53WD3P.js.map} +0 -0
- /package/dist/{interactive-B72SWNWB.js.map → interactive-NA6SAIAG.js.map} +0 -0
|
@@ -301,7 +301,7 @@ var require_dist = __commonJS({
|
|
|
301
301
|
}
|
|
302
302
|
});
|
|
303
303
|
|
|
304
|
-
// ../../packages/core/dist/chunk-
|
|
304
|
+
// ../../packages/core/dist/chunk-C4MKEQR5.js
|
|
305
305
|
import { constants } from "node:fs";
|
|
306
306
|
import { access, readFile } from "node:fs/promises";
|
|
307
307
|
import path from "node:path";
|
|
@@ -419,7 +419,7 @@ __export(external_exports2, {
|
|
|
419
419
|
void: () => voidType
|
|
420
420
|
});
|
|
421
421
|
|
|
422
|
-
// ../../packages/core/dist/chunk-
|
|
422
|
+
// ../../packages/core/dist/chunk-C4MKEQR5.js
|
|
423
423
|
var TEST_MESSAGE_ROLE_VALUES = ["system", "user", "assistant", "tool"];
|
|
424
424
|
var TEST_MESSAGE_ROLES = TEST_MESSAGE_ROLE_VALUES;
|
|
425
425
|
var TEST_MESSAGE_ROLE_SET = new Set(TEST_MESSAGE_ROLE_VALUES);
|
|
@@ -14657,19 +14657,26 @@ function logWarning(message) {
|
|
|
14657
14657
|
console.warn(`${ANSI_YELLOW2}Warning: ${message}${ANSI_RESET3}`);
|
|
14658
14658
|
}
|
|
14659
14659
|
var TEMPLATE_VARIABLES = {
|
|
14660
|
+
/** @deprecated Use OUTPUT_TEXT instead */
|
|
14660
14661
|
ANSWER: "answer",
|
|
14661
14662
|
EXPECTED_OUTPUT: "expected_output",
|
|
14663
|
+
/** @deprecated Use INPUT_TEXT instead */
|
|
14662
14664
|
QUESTION: "question",
|
|
14663
14665
|
CRITERIA: "criteria",
|
|
14666
|
+
/** @deprecated Use EXPECTED_OUTPUT_TEXT instead */
|
|
14664
14667
|
REFERENCE_ANSWER: "reference_answer",
|
|
14665
14668
|
INPUT: "input",
|
|
14666
14669
|
OUTPUT: "output",
|
|
14667
|
-
FILE_CHANGES: "file_changes"
|
|
14670
|
+
FILE_CHANGES: "file_changes",
|
|
14671
|
+
INPUT_TEXT: "input_text",
|
|
14672
|
+
OUTPUT_TEXT: "output_text",
|
|
14673
|
+
EXPECTED_OUTPUT_TEXT: "expected_output_text"
|
|
14668
14674
|
};
|
|
14669
14675
|
var VALID_TEMPLATE_VARIABLES = new Set(Object.values(TEMPLATE_VARIABLES));
|
|
14670
14676
|
var REQUIRED_TEMPLATE_VARIABLES = /* @__PURE__ */ new Set([
|
|
14671
14677
|
TEMPLATE_VARIABLES.ANSWER,
|
|
14672
|
-
TEMPLATE_VARIABLES.EXPECTED_OUTPUT
|
|
14678
|
+
TEMPLATE_VARIABLES.EXPECTED_OUTPUT,
|
|
14679
|
+
TEMPLATE_VARIABLES.OUTPUT_TEXT
|
|
14673
14680
|
]);
|
|
14674
14681
|
var ANSI_YELLOW3 = "\x1B[33m";
|
|
14675
14682
|
var ANSI_RESET4 = "\x1B[0m";
|
|
@@ -14690,13 +14697,13 @@ function validateTemplateVariables(content, source) {
|
|
|
14690
14697
|
}
|
|
14691
14698
|
match = variablePattern.exec(content);
|
|
14692
14699
|
}
|
|
14693
|
-
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER);
|
|
14700
|
+
const hasCandidateAnswer = foundVariables.has(TEMPLATE_VARIABLES.ANSWER) || foundVariables.has(TEMPLATE_VARIABLES.OUTPUT_TEXT);
|
|
14694
14701
|
const hasExpectedOutput = foundVariables.has(TEMPLATE_VARIABLES.EXPECTED_OUTPUT);
|
|
14695
14702
|
const hasRequiredFields = hasCandidateAnswer || hasExpectedOutput;
|
|
14696
14703
|
if (!hasRequiredFields) {
|
|
14697
14704
|
throw new Error(
|
|
14698
14705
|
`Missing required fields. Must include at least one of:
|
|
14699
|
-
- {{ ${TEMPLATE_VARIABLES.ANSWER} }}
|
|
14706
|
+
- {{ ${TEMPLATE_VARIABLES.ANSWER} }} or {{ ${TEMPLATE_VARIABLES.OUTPUT_TEXT} }}
|
|
14700
14707
|
- {{ ${TEMPLATE_VARIABLES.EXPECTED_OUTPUT} }}`
|
|
14701
14708
|
);
|
|
14702
14709
|
}
|
|
@@ -23868,7 +23875,11 @@ var CodeEvaluator = class {
|
|
|
23868
23875
|
endTime: context2.endTime ?? null,
|
|
23869
23876
|
fileChanges: context2.fileChanges ?? null,
|
|
23870
23877
|
workspacePath: context2.workspacePath ?? null,
|
|
23871
|
-
config: this.config ?? null
|
|
23878
|
+
config: this.config ?? null,
|
|
23879
|
+
// Text convenience accessors (new names, always strings)
|
|
23880
|
+
inputText: context2.evalCase.question,
|
|
23881
|
+
outputText: context2.candidate,
|
|
23882
|
+
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
23872
23883
|
};
|
|
23873
23884
|
const inputPayload = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
23874
23885
|
let proxyEnv;
|
|
@@ -24110,7 +24121,11 @@ var LlmGraderEvaluator = class {
|
|
|
24110
24121
|
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (context2.evalCase.reference_answer ?? "").trim(),
|
|
24111
24122
|
[TEMPLATE_VARIABLES.CRITERIA]: context2.evalCase.criteria.trim(),
|
|
24112
24123
|
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
24113
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? ""
|
|
24124
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: context2.fileChanges ?? "",
|
|
24125
|
+
// Text convenience accessors (new names, always strings)
|
|
24126
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
24127
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: context2.candidate.trim(),
|
|
24128
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (context2.evalCase.reference_answer ?? "").trim()
|
|
24114
24129
|
};
|
|
24115
24130
|
const systemPrompt = buildOutputSchema();
|
|
24116
24131
|
const evaluatorTemplate = context2.evaluatorTemplateOverride ?? this.evaluatorTemplate ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
@@ -25950,28 +25965,60 @@ var LatencyEvaluator = class {
|
|
|
25950
25965
|
};
|
|
25951
25966
|
}
|
|
25952
25967
|
};
|
|
25968
|
+
var CLAUDE_MATCHER = {
|
|
25969
|
+
skillTools: ["Skill"],
|
|
25970
|
+
skillInputField: "skill",
|
|
25971
|
+
readTools: ["Read"],
|
|
25972
|
+
readInputField: "file_path"
|
|
25973
|
+
};
|
|
25974
|
+
var COPILOT_MATCHER = {
|
|
25975
|
+
skillTools: ["Skill", "skill"],
|
|
25976
|
+
skillInputField: "skill",
|
|
25977
|
+
readTools: ["Read File", "readFile", "Read", "readTextFile"],
|
|
25978
|
+
readInputField: "file_path"
|
|
25979
|
+
};
|
|
25980
|
+
var PROVIDER_TOOL_SEMANTICS = {
|
|
25981
|
+
claude: CLAUDE_MATCHER,
|
|
25982
|
+
"claude-cli": CLAUDE_MATCHER,
|
|
25983
|
+
"claude-sdk": CLAUDE_MATCHER,
|
|
25984
|
+
"pi-coding-agent": CLAUDE_MATCHER,
|
|
25985
|
+
"pi-agent-sdk": CLAUDE_MATCHER,
|
|
25986
|
+
"copilot-cli": COPILOT_MATCHER,
|
|
25987
|
+
"copilot-sdk": COPILOT_MATCHER,
|
|
25988
|
+
vscode: COPILOT_MATCHER,
|
|
25989
|
+
"vscode-insiders": COPILOT_MATCHER
|
|
25990
|
+
};
|
|
25953
25991
|
var SkillTriggerEvaluator = class {
|
|
25954
25992
|
kind = "skill-trigger";
|
|
25955
25993
|
config;
|
|
25956
25994
|
constructor(config) {
|
|
25957
25995
|
this.config = config;
|
|
25958
25996
|
}
|
|
25997
|
+
resolveMatcher(providerKind) {
|
|
25998
|
+
if (providerKind) {
|
|
25999
|
+
const match = PROVIDER_TOOL_SEMANTICS[providerKind];
|
|
26000
|
+
if (match) return match;
|
|
26001
|
+
}
|
|
26002
|
+
return CLAUDE_MATCHER;
|
|
26003
|
+
}
|
|
25959
26004
|
evaluate(context2) {
|
|
25960
26005
|
const skillName = this.config.skill;
|
|
25961
26006
|
const shouldTrigger = this.config.should_trigger !== false;
|
|
26007
|
+
const providerKind = context2.provider?.kind;
|
|
26008
|
+
const matcher = this.resolveMatcher(providerKind);
|
|
25962
26009
|
const firstTool = (context2.output ?? []).flatMap((msg) => msg.toolCalls ?? [])[0];
|
|
25963
26010
|
let triggered = false;
|
|
25964
26011
|
let evidence = "";
|
|
25965
26012
|
if (firstTool) {
|
|
25966
26013
|
const input = firstTool.input ?? {};
|
|
25967
|
-
if (firstTool.tool
|
|
25968
|
-
const skillArg = String(input.
|
|
26014
|
+
if (matcher.skillTools.includes(firstTool.tool)) {
|
|
26015
|
+
const skillArg = String(input[matcher.skillInputField] ?? "");
|
|
25969
26016
|
if (skillArg.includes(skillName)) {
|
|
25970
26017
|
triggered = true;
|
|
25971
|
-
evidence = `Skill tool invoked with
|
|
26018
|
+
evidence = `Skill tool invoked with ${matcher.skillInputField}="${skillArg}"`;
|
|
25972
26019
|
}
|
|
25973
|
-
} else if (firstTool.tool
|
|
25974
|
-
const filePath = String(input.
|
|
26020
|
+
} else if (matcher.readTools.includes(firstTool.tool)) {
|
|
26021
|
+
const filePath = String(input[matcher.readInputField] ?? "");
|
|
25975
26022
|
if (filePath.includes(skillName)) {
|
|
25976
26023
|
triggered = true;
|
|
25977
26024
|
evidence = `Read tool loaded skill file: ${filePath}`;
|
|
@@ -25996,7 +26043,7 @@ var SkillTriggerEvaluator = class {
|
|
|
25996
26043
|
verdict: "fail",
|
|
25997
26044
|
hits: [],
|
|
25998
26045
|
misses: [
|
|
25999
|
-
shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not
|
|
26046
|
+
shouldTrigger ? firstTool ? `First tool was "${firstTool.tool}" \u2014 not a skill/read tool for "${skillName}"` : "No tool calls recorded" : evidence || `Skill "${skillName}" triggered unexpectedly`
|
|
26000
26047
|
],
|
|
26001
26048
|
expectedAspectCount: 1,
|
|
26002
26049
|
reasoning: shouldTrigger ? `Skill "${skillName}" was not triggered` : "False trigger: skill fired when it should not have"
|
|
@@ -26038,7 +26085,11 @@ function assembleFreeform(evalCase, candidate, promptInputs, fileChanges, evalua
|
|
|
26038
26085
|
[TEMPLATE_VARIABLES.REFERENCE_ANSWER]: (evalCase.reference_answer ?? "").trim(),
|
|
26039
26086
|
[TEMPLATE_VARIABLES.CRITERIA]: evalCase.criteria.trim(),
|
|
26040
26087
|
[TEMPLATE_VARIABLES.QUESTION]: formattedQuestion.trim(),
|
|
26041
|
-
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? ""
|
|
26088
|
+
[TEMPLATE_VARIABLES.FILE_CHANGES]: fileChanges ?? "",
|
|
26089
|
+
// Text convenience accessors (new names, always strings)
|
|
26090
|
+
[TEMPLATE_VARIABLES.INPUT_TEXT]: formattedQuestion.trim(),
|
|
26091
|
+
[TEMPLATE_VARIABLES.OUTPUT_TEXT]: candidate.trim(),
|
|
26092
|
+
[TEMPLATE_VARIABLES.EXPECTED_OUTPUT_TEXT]: (evalCase.reference_answer ?? "").trim()
|
|
26042
26093
|
};
|
|
26043
26094
|
const systemPrompt = buildOutputSchema();
|
|
26044
26095
|
const template = evaluatorTemplateOverride ?? DEFAULT_EVALUATOR_TEMPLATE;
|
|
@@ -27029,7 +27080,11 @@ async function executePromptTemplate(script, context2, config, timeoutMs) {
|
|
|
27029
27080
|
trace: context2.trace ?? null,
|
|
27030
27081
|
fileChanges: context2.fileChanges ?? null,
|
|
27031
27082
|
workspacePath: context2.workspacePath ?? null,
|
|
27032
|
-
config: config ?? context2.config ?? null
|
|
27083
|
+
config: config ?? context2.config ?? null,
|
|
27084
|
+
// Text convenience accessors (new names, always strings)
|
|
27085
|
+
inputText: context2.evalCase.question,
|
|
27086
|
+
outputText: context2.candidate,
|
|
27087
|
+
expectedOutputText: context2.evalCase.reference_answer ?? ""
|
|
27033
27088
|
};
|
|
27034
27089
|
const inputJson = JSON.stringify(toSnakeCaseDeep(payload), null, 2);
|
|
27035
27090
|
const scriptPath = script[script.length - 1];
|
|
@@ -31148,4 +31203,4 @@ export {
|
|
|
31148
31203
|
OtelStreamingObserver,
|
|
31149
31204
|
createAgentKernel
|
|
31150
31205
|
};
|
|
31151
|
-
//# sourceMappingURL=chunk-
|
|
31206
|
+
//# sourceMappingURL=chunk-5M3K2DMV.js.map
|