@mastra/evals 0.12.1 → 0.13.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-5CVZXIFW.js +36 -0
- package/dist/chunk-5CVZXIFW.js.map +1 -0
- package/dist/chunk-QVZBKGOE.cjs +41 -0
- package/dist/chunk-QVZBKGOE.cjs.map +1 -0
- package/dist/{dist-BODKWAXM.cjs → dist-JQCAD3AD.cjs} +9 -9
- package/dist/{dist-BODKWAXM.cjs.map → dist-JQCAD3AD.cjs.map} +1 -1
- package/dist/{dist-JRG62SVA.js → dist-JVIEAZJ6.js} +9 -9
- package/dist/{dist-JRG62SVA.js.map → dist-JVIEAZJ6.js.map} +1 -1
- package/dist/evaluation.d.ts +1 -1
- package/dist/evaluation.d.ts.map +1 -1
- package/dist/index.cjs +3 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/scorers/code/index.cjs +85 -0
- package/dist/scorers/code/index.cjs.map +1 -1
- package/dist/scorers/code/index.d.ts +1 -0
- package/dist/scorers/code/index.d.ts.map +1 -1
- package/dist/scorers/code/index.js +85 -1
- package/dist/scorers/code/index.js.map +1 -1
- package/dist/scorers/code/tool-call-accuracy/index.d.ts +18 -0
- package/dist/scorers/code/tool-call-accuracy/index.d.ts.map +1 -0
- package/dist/scorers/llm/index.cjs +184 -28
- package/dist/scorers/llm/index.cjs.map +1 -1
- package/dist/scorers/llm/index.d.ts +1 -0
- package/dist/scorers/llm/index.d.ts.map +1 -1
- package/dist/scorers/llm/index.js +170 -15
- package/dist/scorers/llm/index.js.map +1 -1
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts +22 -0
- package/dist/scorers/llm/tool-call-accuracy/index.d.ts.map +1 -0
- package/dist/scorers/llm/tool-call-accuracy/prompts.d.ts +19 -0
- package/dist/scorers/llm/tool-call-accuracy/prompts.d.ts.map +1 -0
- package/dist/scorers/utils.d.ts +12 -0
- package/dist/scorers/utils.d.ts.map +1 -1
- package/package.json +11 -10
|
@@ -1,19 +1,10 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var chunk7QAUEU4L_cjs = require('../../chunk-7QAUEU4L.cjs');
|
|
4
|
+
var chunkQVZBKGOE_cjs = require('../../chunk-QVZBKGOE.cjs');
|
|
4
5
|
var scores = require('@mastra/core/scores');
|
|
5
6
|
var zod = require('zod');
|
|
6
7
|
|
|
7
|
-
var roundToTwoDecimals2 = (num) => {
|
|
8
|
-
return Math.round((num + Number.EPSILON) * 100) / 100;
|
|
9
|
-
};
|
|
10
|
-
var getUserMessageFromRunInput = (input) => {
|
|
11
|
-
return input?.inputMessages.find(({ role }) => role === "user")?.content;
|
|
12
|
-
};
|
|
13
|
-
var getAssistantMessageFromRunOutput = (output) => {
|
|
14
|
-
return output?.find(({ role }) => role === "assistant")?.content;
|
|
15
|
-
};
|
|
16
|
-
|
|
17
8
|
// src/scorers/llm/answer-relevancy/prompts.ts
|
|
18
9
|
var createExtractPrompt = (output) => `
|
|
19
10
|
Given the text, break it down into meaningful statements while preserving context and relationships.
|
|
@@ -236,14 +227,14 @@ function createAnswerRelevancyScorer({
|
|
|
236
227
|
description: "Extract relevant statements from the LLM output",
|
|
237
228
|
outputSchema: extractOutputSchema,
|
|
238
229
|
createPrompt: ({ run }) => {
|
|
239
|
-
const assistantMessage = getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
230
|
+
const assistantMessage = chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
240
231
|
return createExtractPrompt(assistantMessage);
|
|
241
232
|
}
|
|
242
233
|
}).analyze({
|
|
243
234
|
description: "Score the relevance of the statements to the input",
|
|
244
235
|
outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
|
|
245
236
|
createPrompt: ({ run, results }) => {
|
|
246
|
-
const input = getUserMessageFromRunInput(run.input) ?? "";
|
|
237
|
+
const input = chunkQVZBKGOE_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
247
238
|
return createScorePrompt(JSON.stringify(input), results.preprocessStepResult?.statements || []);
|
|
248
239
|
}
|
|
249
240
|
}).generateScore(({ results }) => {
|
|
@@ -265,8 +256,8 @@ function createAnswerRelevancyScorer({
|
|
|
265
256
|
description: "Reason about the results",
|
|
266
257
|
createPrompt: ({ run, results, score }) => {
|
|
267
258
|
return createReasonPrompt({
|
|
268
|
-
input: getUserMessageFromRunInput(run.input) ?? "",
|
|
269
|
-
output: getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
259
|
+
input: chunkQVZBKGOE_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
260
|
+
output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
270
261
|
score,
|
|
271
262
|
results: results.analyzeStepResult.results,
|
|
272
263
|
scale: options.scale
|
|
@@ -444,7 +435,7 @@ function createFaithfulnessScorer({
|
|
|
444
435
|
description: "Extract relevant statements from the LLM output",
|
|
445
436
|
outputSchema: zod.z.array(zod.z.string()),
|
|
446
437
|
createPrompt: ({ run }) => {
|
|
447
|
-
const prompt = createFaithfulnessExtractPrompt({ output: getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
438
|
+
const prompt = createFaithfulnessExtractPrompt({ output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
448
439
|
return prompt;
|
|
449
440
|
}
|
|
450
441
|
}).analyze({
|
|
@@ -465,13 +456,13 @@ function createFaithfulnessScorer({
|
|
|
465
456
|
return 0;
|
|
466
457
|
}
|
|
467
458
|
const score = supportedClaims / totalClaims * (options?.scale || 1);
|
|
468
|
-
return
|
|
459
|
+
return chunkQVZBKGOE_cjs.roundToTwoDecimals(score);
|
|
469
460
|
}).generateReason({
|
|
470
461
|
description: "Reason about the results",
|
|
471
462
|
createPrompt: ({ run, results, score }) => {
|
|
472
463
|
const prompt = createFaithfulnessReasonPrompt({
|
|
473
|
-
input: getUserMessageFromRunInput(run.input) ?? "",
|
|
474
|
-
output: getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
464
|
+
input: chunkQVZBKGOE_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
465
|
+
output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
475
466
|
context: run.output.find(({ role }) => role === "assistant")?.toolInvocations?.map((toolCall) => JSON.stringify(toolCall)) || [],
|
|
476
467
|
score,
|
|
477
468
|
scale: options?.scale || 1,
|
|
@@ -602,13 +593,13 @@ function createBiasScorer({ model, options }) {
|
|
|
602
593
|
outputSchema: zod.z.object({
|
|
603
594
|
opinions: zod.z.array(zod.z.string())
|
|
604
595
|
}),
|
|
605
|
-
createPrompt: ({ run }) => createBiasExtractPrompt({ output: getAssistantMessageFromRunOutput(run.output) ?? "" })
|
|
596
|
+
createPrompt: ({ run }) => createBiasExtractPrompt({ output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" })
|
|
606
597
|
}).analyze({
|
|
607
598
|
description: "Score the relevance of the statements to the input",
|
|
608
599
|
outputSchema: zod.z.object({ results: zod.z.array(zod.z.object({ result: zod.z.string(), reason: zod.z.string() })) }),
|
|
609
600
|
createPrompt: ({ run, results }) => {
|
|
610
601
|
const prompt = createBiasAnalyzePrompt({
|
|
611
|
-
output: getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
602
|
+
output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
612
603
|
opinions: results.preprocessStepResult?.opinions || []
|
|
613
604
|
});
|
|
614
605
|
return prompt;
|
|
@@ -619,7 +610,7 @@ function createBiasScorer({ model, options }) {
|
|
|
619
610
|
}
|
|
620
611
|
const biasedVerdicts = results.analyzeStepResult.results.filter((v) => v.result.toLowerCase() === "yes");
|
|
621
612
|
const score = biasedVerdicts.length / results.analyzeStepResult.results.length;
|
|
622
|
-
return
|
|
613
|
+
return chunkQVZBKGOE_cjs.roundToTwoDecimals(score * (options?.scale || 1));
|
|
623
614
|
}).generateReason({
|
|
624
615
|
description: "Reason about the results",
|
|
625
616
|
createPrompt: ({ score, results }) => {
|
|
@@ -836,7 +827,7 @@ function createHallucinationScorer({
|
|
|
836
827
|
claims: zod.z.array(zod.z.string())
|
|
837
828
|
}),
|
|
838
829
|
createPrompt: ({ run }) => {
|
|
839
|
-
const prompt = createHallucinationExtractPrompt({ output: getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
830
|
+
const prompt = createHallucinationExtractPrompt({ output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "" });
|
|
840
831
|
return prompt;
|
|
841
832
|
}
|
|
842
833
|
}).analyze({
|
|
@@ -858,13 +849,13 @@ function createHallucinationScorer({
|
|
|
858
849
|
return 0;
|
|
859
850
|
}
|
|
860
851
|
const score = contradictedStatements / totalStatements * (options?.scale || 1);
|
|
861
|
-
return
|
|
852
|
+
return chunkQVZBKGOE_cjs.roundToTwoDecimals(score);
|
|
862
853
|
}).generateReason({
|
|
863
854
|
description: "Reason about the results",
|
|
864
855
|
createPrompt: ({ run, results, score }) => {
|
|
865
856
|
const prompt = createHallucinationReasonPrompt({
|
|
866
|
-
input: getUserMessageFromRunInput(run.input) ?? "",
|
|
867
|
-
output: getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
857
|
+
input: chunkQVZBKGOE_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
858
|
+
output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "",
|
|
868
859
|
context: options?.context || [],
|
|
869
860
|
score,
|
|
870
861
|
scale: options?.scale || 1,
|
|
@@ -973,8 +964,8 @@ function createToxicityScorer({ model, options }) {
|
|
|
973
964
|
outputSchema: zod.z.object({ verdicts: zod.z.array(zod.z.object({ verdict: zod.z.string(), reason: zod.z.string() })) }),
|
|
974
965
|
createPrompt: ({ run }) => {
|
|
975
966
|
const prompt = createToxicityAnalyzePrompt({
|
|
976
|
-
input: getUserMessageFromRunInput(run.input) ?? "",
|
|
977
|
-
output: getAssistantMessageFromRunOutput(run.output) ?? ""
|
|
967
|
+
input: chunkQVZBKGOE_cjs.getUserMessageFromRunInput(run.input) ?? "",
|
|
968
|
+
output: chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? ""
|
|
978
969
|
});
|
|
979
970
|
return prompt;
|
|
980
971
|
}
|
|
@@ -990,7 +981,7 @@ function createToxicityScorer({ model, options }) {
|
|
|
990
981
|
}
|
|
991
982
|
}
|
|
992
983
|
const score = toxicityCount / numberOfVerdicts;
|
|
993
|
-
return
|
|
984
|
+
return chunkQVZBKGOE_cjs.roundToTwoDecimals(score * (options?.scale || 1));
|
|
994
985
|
}).generateReason({
|
|
995
986
|
description: "Reason about the results",
|
|
996
987
|
createPrompt: ({ results, score }) => {
|
|
@@ -1003,12 +994,177 @@ function createToxicityScorer({ model, options }) {
|
|
|
1003
994
|
});
|
|
1004
995
|
}
|
|
1005
996
|
|
|
997
|
+
// src/scorers/llm/tool-call-accuracy/prompts.ts
|
|
998
|
+
var TOOL_SELECTION_ACCURACY_INSTRUCTIONS = `
|
|
999
|
+
You are an expert evaluator specializing in AI agent tool selection analysis. Your role is to assess whether an agent chose appropriate tools based on explicit user requests.
|
|
1000
|
+
|
|
1001
|
+
CORE RESPONSIBILITIES:
|
|
1002
|
+
- Analyze user requests to understand what was explicitly asked for
|
|
1003
|
+
- Evaluate each tool call against the specific user need
|
|
1004
|
+
- Identify missing tools that should have been used
|
|
1005
|
+
- Apply strict evaluation criteria focused on direct relevance
|
|
1006
|
+
|
|
1007
|
+
EVALUATION PHILOSOPHY:
|
|
1008
|
+
- Be precise and literal in your assessments
|
|
1009
|
+
- Only approve tools that directly address the user's explicit request
|
|
1010
|
+
- Distinguish between "helpful" and "appropriate" - reject tools that are merely helpful but not requested
|
|
1011
|
+
- Consider context but prioritize what was actually asked for
|
|
1012
|
+
|
|
1013
|
+
OUTPUT REQUIREMENTS:
|
|
1014
|
+
- Provide clear, specific reasoning for each evaluation
|
|
1015
|
+
- Use provided JSON schema exactly as specified
|
|
1016
|
+
- Be consistent in your evaluation standards
|
|
1017
|
+
- Focus on actionable insights
|
|
1018
|
+
|
|
1019
|
+
You excel at identifying the difference between tools that directly serve the user's stated need versus tools that might be generally useful but weren't requested.
|
|
1020
|
+
`;
|
|
1021
|
+
var createAnalyzePrompt = ({
|
|
1022
|
+
userInput,
|
|
1023
|
+
agentResponse,
|
|
1024
|
+
toolsCalled,
|
|
1025
|
+
availableTools
|
|
1026
|
+
}) => {
|
|
1027
|
+
return `
|
|
1028
|
+
You are evaluating whether an AI agent made appropriate tool choices for a user request.
|
|
1029
|
+
|
|
1030
|
+
USER REQUEST: "${userInput}"
|
|
1031
|
+
AGENT RESPONSE: "${agentResponse}"
|
|
1032
|
+
TOOLS THE AGENT ACTUALLY CALLED: ${toolsCalled.length > 0 ? toolsCalled.join(", ") : "None"}
|
|
1033
|
+
|
|
1034
|
+
TOOL REFERENCE:
|
|
1035
|
+
${availableTools}
|
|
1036
|
+
|
|
1037
|
+
EVALUATION RULES:
|
|
1038
|
+
1. If NO tools were called: evaluate BOTH the user request AND agent response:
|
|
1039
|
+
- Did the user make a specific, actionable request?
|
|
1040
|
+
- Did the agent appropriately ask for clarification when details were insufficient?
|
|
1041
|
+
- Would calling a tool without the requested clarification provide poor results?
|
|
1042
|
+
2. If tools WERE called: evaluate if each tool was appropriate for the EXPLICIT user request
|
|
1043
|
+
|
|
1044
|
+
AGENT RESPONSE EVALUATION:
|
|
1045
|
+
When no tools are called, consider if the agent's response demonstrates good judgment:
|
|
1046
|
+
- Asking follow-up questions for vague requests = APPROPRIATE (missingTools should be empty)
|
|
1047
|
+
- Providing generic answers without using available tools = INAPPROPRIATE
|
|
1048
|
+
- Ignoring clear, specific requests = INAPPROPRIATE
|
|
1049
|
+
|
|
1050
|
+
CLARIFICATION EXAMPLES:
|
|
1051
|
+
User: "I'm looking for a firm" + Agent asks about practice area/location = APPROPRIATE clarification
|
|
1052
|
+
User: "help with legal stuff" + Agent asks for specifics = APPROPRIATE clarification
|
|
1053
|
+
User: "Create RFP for corporate litigation in NY" + Agent asks for more details = INAPPROPRIATE delay
|
|
1054
|
+
User: "I need pricing for litigation" + Agent gives generic answer = MISSED tool opportunity
|
|
1055
|
+
|
|
1056
|
+
EVALUATION QUESTION:
|
|
1057
|
+
Did the agent make the right choice between:
|
|
1058
|
+
1. Acting immediately with available tools, OR
|
|
1059
|
+
2. Gathering more information for better results?
|
|
1060
|
+
|
|
1061
|
+
Consider: Would you rather get generic firm recommendations or have the agent ask clarifying questions first?
|
|
1062
|
+
|
|
1063
|
+
STRICT EVALUATION CRITERIA:
|
|
1064
|
+
- Only mark tools as appropriate if they DIRECTLY address what the user explicitly asked for
|
|
1065
|
+
- Do NOT mark tools as appropriate just because they might be "helpful" or "related" to the domain
|
|
1066
|
+
- If the user asked for "A", only tools that provide "A" should be marked appropriate
|
|
1067
|
+
- Additional tools the agent decided to call without being asked should be marked inappropriate
|
|
1068
|
+
|
|
1069
|
+
Evaluate each tool that was called, or if no tools were called, evaluate whether that was the right decision.
|
|
1070
|
+
`;
|
|
1071
|
+
};
|
|
1072
|
+
var createReasonPrompt2 = ({
|
|
1073
|
+
userInput,
|
|
1074
|
+
score,
|
|
1075
|
+
evaluations,
|
|
1076
|
+
missingTools
|
|
1077
|
+
}) => {
|
|
1078
|
+
return `
|
|
1079
|
+
Explain this tool selection evaluation in ONE SENTENCE.
|
|
1080
|
+
|
|
1081
|
+
User Request: "${userInput}"
|
|
1082
|
+
Score: ${score}/1
|
|
1083
|
+
Tools Evaluated: ${JSON.stringify(evaluations)}
|
|
1084
|
+
Missing Tools: ${JSON.stringify(missingTools)}
|
|
1085
|
+
|
|
1086
|
+
Provide a single, concise sentence explaining why this score was given.
|
|
1087
|
+
`;
|
|
1088
|
+
};
|
|
1089
|
+
|
|
1090
|
+
// src/scorers/llm/tool-call-accuracy/index.ts
|
|
1091
|
+
var analyzeOutputSchema = zod.z.object({
|
|
1092
|
+
evaluations: zod.z.array(
|
|
1093
|
+
zod.z.object({
|
|
1094
|
+
toolCalled: zod.z.string(),
|
|
1095
|
+
wasAppropriate: zod.z.boolean(),
|
|
1096
|
+
reasoning: zod.z.string()
|
|
1097
|
+
})
|
|
1098
|
+
),
|
|
1099
|
+
missingTools: zod.z.array(zod.z.string()).optional()
|
|
1100
|
+
});
|
|
1101
|
+
function createToolCallAccuracyScorerLLM({ model, availableTools }) {
|
|
1102
|
+
const toolDefinitions = availableTools.map((tool) => `${tool.name}: ${tool.description}`).join("\n");
|
|
1103
|
+
return scores.createScorer({
|
|
1104
|
+
name: "Tool Call Accuracy (LLM)",
|
|
1105
|
+
description: "Evaluates whether an agent selected appropriate tools for the given task using LLM analysis",
|
|
1106
|
+
judge: {
|
|
1107
|
+
model,
|
|
1108
|
+
instructions: TOOL_SELECTION_ACCURACY_INSTRUCTIONS
|
|
1109
|
+
}
|
|
1110
|
+
}).preprocess(async ({ run }) => {
|
|
1111
|
+
const isInputInvalid = !run.input || !run.input.inputMessages || run.input.inputMessages.length === 0;
|
|
1112
|
+
const isOutputInvalid = !run.output || run.output.length === 0;
|
|
1113
|
+
if (isInputInvalid || isOutputInvalid) {
|
|
1114
|
+
throw new Error("Input and output messages cannot be null or empty");
|
|
1115
|
+
}
|
|
1116
|
+
const { tools: actualTools, toolCallInfos } = chunkQVZBKGOE_cjs.extractToolCalls(run.output);
|
|
1117
|
+
return {
|
|
1118
|
+
actualTools,
|
|
1119
|
+
hasToolCalls: actualTools.length > 0,
|
|
1120
|
+
toolCallInfos
|
|
1121
|
+
};
|
|
1122
|
+
}).analyze({
|
|
1123
|
+
description: "Analyze the appropriateness of tool selections",
|
|
1124
|
+
outputSchema: analyzeOutputSchema,
|
|
1125
|
+
createPrompt: ({ run, results }) => {
|
|
1126
|
+
const userInput = chunkQVZBKGOE_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1127
|
+
const agentResponse = chunkQVZBKGOE_cjs.getAssistantMessageFromRunOutput(run.output) ?? "";
|
|
1128
|
+
const toolsCalled = results.preprocessStepResult?.actualTools || [];
|
|
1129
|
+
return createAnalyzePrompt({
|
|
1130
|
+
userInput,
|
|
1131
|
+
agentResponse,
|
|
1132
|
+
toolsCalled,
|
|
1133
|
+
availableTools: toolDefinitions
|
|
1134
|
+
});
|
|
1135
|
+
}
|
|
1136
|
+
}).generateScore(({ results }) => {
|
|
1137
|
+
const evaluations = results.analyzeStepResult?.evaluations || [];
|
|
1138
|
+
if (evaluations.length === 0) {
|
|
1139
|
+
const missingTools = results.analyzeStepResult?.missingTools || [];
|
|
1140
|
+
return missingTools.length > 0 ? 0 : 1;
|
|
1141
|
+
}
|
|
1142
|
+
const appropriateToolCalls = evaluations.filter((e) => e.wasAppropriate).length;
|
|
1143
|
+
const totalToolCalls = evaluations.length;
|
|
1144
|
+
return chunkQVZBKGOE_cjs.roundToTwoDecimals(appropriateToolCalls / totalToolCalls);
|
|
1145
|
+
}).generateReason({
|
|
1146
|
+
description: "Generate human-readable explanation of tool selection evaluation",
|
|
1147
|
+
createPrompt: ({ run, results, score }) => {
|
|
1148
|
+
const userInput = chunkQVZBKGOE_cjs.getUserMessageFromRunInput(run.input) ?? "";
|
|
1149
|
+
const evaluations = results.analyzeStepResult?.evaluations || [];
|
|
1150
|
+
const missingTools = results.analyzeStepResult?.missingTools || [];
|
|
1151
|
+
return createReasonPrompt2({
|
|
1152
|
+
userInput,
|
|
1153
|
+
score,
|
|
1154
|
+
evaluations,
|
|
1155
|
+
missingTools
|
|
1156
|
+
});
|
|
1157
|
+
}
|
|
1158
|
+
});
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1006
1161
|
exports.ANSWER_RELEVANCY_AGENT_INSTRUCTIONS = ANSWER_RELEVANCY_AGENT_INSTRUCTIONS;
|
|
1007
1162
|
exports.DEFAULT_OPTIONS = DEFAULT_OPTIONS;
|
|
1008
1163
|
exports.createAnswerRelevancyScorer = createAnswerRelevancyScorer;
|
|
1009
1164
|
exports.createBiasScorer = createBiasScorer;
|
|
1010
1165
|
exports.createFaithfulnessScorer = createFaithfulnessScorer;
|
|
1011
1166
|
exports.createHallucinationScorer = createHallucinationScorer;
|
|
1167
|
+
exports.createToolCallAccuracyScorerLLM = createToolCallAccuracyScorerLLM;
|
|
1012
1168
|
exports.createToxicityScorer = createToxicityScorer;
|
|
1013
1169
|
//# sourceMappingURL=index.cjs.map
|
|
1014
1170
|
//# sourceMappingURL=index.cjs.map
|