@juspay/neurolink 9.36.1 → 9.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +1105 -556
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/lib/processors/media/VideoProcessor.js +90 -41
- package/dist/lib/telemetry/telemetryService.d.ts +1 -1
- package/dist/lib/telemetry/telemetryService.js +27 -13
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/processors/media/VideoProcessor.js +90 -41
- package/dist/telemetry/telemetryService.d.ts +1 -1
- package/dist/telemetry/telemetryService.js +27 -13
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +7 -7
- package/dist/processors/media/ffprobe-static.d.ts +0 -4
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Faithfulness scorer
|
|
3
|
+
* Evaluates if the response is grounded in the provided context
|
|
4
|
+
*/
|
|
5
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
6
|
+
const FAITHFULNESS_PROMPT = `You are an expert at evaluating faithfulness in AI responses.
|
|
7
|
+
|
|
8
|
+
Faithfulness measures whether the response is grounded in and supported by the provided context.
|
|
9
|
+
A faithful response:
|
|
10
|
+
- Only makes claims that are supported by the context
|
|
11
|
+
- Does not add information not present in the context
|
|
12
|
+
- Accurately represents the information from the context
|
|
13
|
+
|
|
14
|
+
## Response to Evaluate
|
|
15
|
+
{{response}}
|
|
16
|
+
|
|
17
|
+
## Source Context
|
|
18
|
+
{{context}}
|
|
19
|
+
|
|
20
|
+
{{#if hasQuery}}
|
|
21
|
+
## Original Query
|
|
22
|
+
{{query}}
|
|
23
|
+
{{/if}}
|
|
24
|
+
|
|
25
|
+
## Instructions
|
|
26
|
+
|
|
27
|
+
1. Extract all claims/statements from the response
|
|
28
|
+
2. For each claim, determine if it's supported by the context
|
|
29
|
+
3. Calculate the faithfulness score based on the proportion of supported claims
|
|
30
|
+
|
|
31
|
+
## Output Format (JSON)
|
|
32
|
+
|
|
33
|
+
{
|
|
34
|
+
"score": <0-10>,
|
|
35
|
+
"claims": [
|
|
36
|
+
{
|
|
37
|
+
"claim": "<extracted claim>",
|
|
38
|
+
"supported": <true|false>,
|
|
39
|
+
"evidence": "<supporting context or 'Not found in context'>"
|
|
40
|
+
}
|
|
41
|
+
],
|
|
42
|
+
"supportedCount": <number>,
|
|
43
|
+
"totalClaims": <number>,
|
|
44
|
+
"reasoning": "<overall assessment>",
|
|
45
|
+
"confidence": <0.0-1.0>
|
|
46
|
+
}`;
|
|
47
|
+
export class FaithfulnessScorer extends BaseLLMScorer {
|
|
48
|
+
constructor(config) {
|
|
49
|
+
super({
|
|
50
|
+
id: "faithfulness",
|
|
51
|
+
name: "Faithfulness",
|
|
52
|
+
description: "Evaluates if the response is faithfully grounded in provided context",
|
|
53
|
+
type: "llm",
|
|
54
|
+
category: "faithfulness",
|
|
55
|
+
version: "1.0.0",
|
|
56
|
+
defaultConfig: {
|
|
57
|
+
enabled: true,
|
|
58
|
+
threshold: 0.7,
|
|
59
|
+
weight: 1.2,
|
|
60
|
+
timeout: 30000,
|
|
61
|
+
retries: 2,
|
|
62
|
+
},
|
|
63
|
+
requiredInputs: ["response", "context"],
|
|
64
|
+
optionalInputs: ["query"],
|
|
65
|
+
}, config);
|
|
66
|
+
}
|
|
67
|
+
generatePrompt(input) {
|
|
68
|
+
let prompt = FAITHFULNESS_PROMPT;
|
|
69
|
+
prompt = this.substituteTemplate(prompt, { response: input.response });
|
|
70
|
+
if (input.context && input.context.length > 0) {
|
|
71
|
+
const contextSection = input.context
|
|
72
|
+
.map((c, i) => `[Source ${i + 1}]: ${c}`)
|
|
73
|
+
.join("\n");
|
|
74
|
+
prompt = prompt.replace("{{context}}", contextSection);
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
prompt = prompt.replace("{{context}}", "No context provided");
|
|
78
|
+
}
|
|
79
|
+
const hasQuery = !!input.query;
|
|
80
|
+
prompt = this.processConditionals(prompt, { hasQuery });
|
|
81
|
+
if (hasQuery) {
|
|
82
|
+
prompt = this.substituteTemplate(prompt, { query: input.query });
|
|
83
|
+
}
|
|
84
|
+
return prompt;
|
|
85
|
+
}
|
|
86
|
+
parseResponse(response, _input) {
|
|
87
|
+
const json = this.extractJSON(response);
|
|
88
|
+
if (!json) {
|
|
89
|
+
const score = this.extractScoreFromText(response);
|
|
90
|
+
return {
|
|
91
|
+
score,
|
|
92
|
+
reasoning: "Could not parse structured response",
|
|
93
|
+
confidence: 0.3,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
const claims = Array.isArray(json.claims)
|
|
97
|
+
? json.claims
|
|
98
|
+
: [];
|
|
99
|
+
const totalClaims = typeof json.totalClaims === "number" ? json.totalClaims : claims.length;
|
|
100
|
+
const supportedCount = typeof json.supportedCount === "number"
|
|
101
|
+
? json.supportedCount
|
|
102
|
+
: claims.filter((c) => c.supported === true).length;
|
|
103
|
+
const faithfulnessRatio = totalClaims > 0 ? supportedCount / totalClaims : 1;
|
|
104
|
+
return {
|
|
105
|
+
score: typeof json.score === "number" ? json.score : faithfulnessRatio * 10,
|
|
106
|
+
reasoning: typeof json.reasoning === "string"
|
|
107
|
+
? json.reasoning
|
|
108
|
+
: "No reasoning provided",
|
|
109
|
+
confidence: typeof json.confidence === "number" ? json.confidence : 0.8,
|
|
110
|
+
metadata: {
|
|
111
|
+
claims,
|
|
112
|
+
supportedCount,
|
|
113
|
+
totalClaims,
|
|
114
|
+
faithfulnessRatio,
|
|
115
|
+
},
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
export async function createFaithfulnessScorer(config) {
|
|
120
|
+
return new FaithfulnessScorer(config);
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=faithfulnessScorer.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Hallucination detection scorer using LLM-as-judge
|
|
3
|
+
* Detects factual errors and unsupported claims in AI responses
|
|
4
|
+
*/
|
|
5
|
+
import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
7
|
+
export declare class HallucinationScorer extends BaseLLMScorer {
|
|
8
|
+
constructor(config?: Partial<LLMScorerConfig>);
|
|
9
|
+
generatePrompt(input: ScorerInput): string;
|
|
10
|
+
parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
|
|
11
|
+
}
|
|
12
|
+
export declare function createHallucinationScorer(config?: Partial<LLMScorerConfig>): Promise<HallucinationScorer>;
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Hallucination detection scorer using LLM-as-judge
|
|
3
|
+
* Detects factual errors and unsupported claims in AI responses
|
|
4
|
+
*/
|
|
5
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
6
|
+
const HALLUCINATION_PROMPT = `You are an expert fact-checker evaluating an AI response for hallucinations.
|
|
7
|
+
|
|
8
|
+
A hallucination is when the AI:
|
|
9
|
+
1. States false facts that contradict known information
|
|
10
|
+
2. Fabricates specific details (names, dates, statistics) without basis
|
|
11
|
+
3. Makes claims that cannot be verified or are contradicted by provided context
|
|
12
|
+
4. Presents opinions or speculation as established facts
|
|
13
|
+
|
|
14
|
+
## Evaluation Context
|
|
15
|
+
|
|
16
|
+
**User Query:**
|
|
17
|
+
{{query}}
|
|
18
|
+
|
|
19
|
+
**AI Response:**
|
|
20
|
+
{{response}}
|
|
21
|
+
|
|
22
|
+
{{#if hasContext}}
|
|
23
|
+
**Provided Context:**
|
|
24
|
+
{{context}}
|
|
25
|
+
{{/if}}
|
|
26
|
+
|
|
27
|
+
{{#if hasGroundTruth}}
|
|
28
|
+
**Ground Truth:**
|
|
29
|
+
{{groundTruth}}
|
|
30
|
+
{{/if}}
|
|
31
|
+
|
|
32
|
+
## Instructions
|
|
33
|
+
|
|
34
|
+
Analyze the response for hallucinations. For each potential hallucination found:
|
|
35
|
+
1. Quote the problematic text
|
|
36
|
+
2. Explain why it's a hallucination
|
|
37
|
+
3. Rate severity (minor, moderate, severe)
|
|
38
|
+
|
|
39
|
+
Then provide an overall score from 0-10:
|
|
40
|
+
- 10: No hallucinations detected
|
|
41
|
+
- 7-9: Minor issues (imprecise but not false)
|
|
42
|
+
- 4-6: Moderate hallucinations present
|
|
43
|
+
- 1-3: Severe hallucinations
|
|
44
|
+
- 0: Response is mostly fabricated
|
|
45
|
+
|
|
46
|
+
## Output Format (JSON)
|
|
47
|
+
|
|
48
|
+
{
|
|
49
|
+
"score": <0-10>,
|
|
50
|
+
"hallucinations": [
|
|
51
|
+
{
|
|
52
|
+
"text": "<quoted problematic text>",
|
|
53
|
+
"reason": "<explanation>",
|
|
54
|
+
"severity": "<minor|moderate|severe>"
|
|
55
|
+
}
|
|
56
|
+
],
|
|
57
|
+
"reasoning": "<overall assessment>",
|
|
58
|
+
"confidence": <0.0-1.0>
|
|
59
|
+
}`;
|
|
60
|
+
export class HallucinationScorer extends BaseLLMScorer {
|
|
61
|
+
constructor(config) {
|
|
62
|
+
super({
|
|
63
|
+
id: "hallucination",
|
|
64
|
+
name: "Hallucination Detection",
|
|
65
|
+
description: "Detects factual errors, fabrications, and unsupported claims in responses",
|
|
66
|
+
type: "llm",
|
|
67
|
+
category: "accuracy",
|
|
68
|
+
version: "1.0.0",
|
|
69
|
+
defaultConfig: {
|
|
70
|
+
enabled: true,
|
|
71
|
+
threshold: 0.8,
|
|
72
|
+
weight: 1.5,
|
|
73
|
+
timeout: 30000,
|
|
74
|
+
retries: 2,
|
|
75
|
+
},
|
|
76
|
+
requiredInputs: ["query", "response"],
|
|
77
|
+
optionalInputs: ["context", "groundTruth"],
|
|
78
|
+
}, config);
|
|
79
|
+
}
|
|
80
|
+
generatePrompt(input) {
|
|
81
|
+
let prompt = HALLUCINATION_PROMPT;
|
|
82
|
+
// Substitute variables
|
|
83
|
+
prompt = this.substituteTemplate(prompt, {
|
|
84
|
+
query: input.query,
|
|
85
|
+
response: input.response,
|
|
86
|
+
});
|
|
87
|
+
// Handle context
|
|
88
|
+
const contextExists = !!(input.context && input.context.length > 0);
|
|
89
|
+
prompt = this.processConditionals(prompt, { hasContext: contextExists });
|
|
90
|
+
if (contextExists && input.context) {
|
|
91
|
+
prompt = prompt.replace("{{context}}", input.context.map((c, i) => `[${i + 1}] ${c}`).join("\n"));
|
|
92
|
+
}
|
|
93
|
+
// Handle ground truth
|
|
94
|
+
const groundTruthExists = !!input.groundTruth;
|
|
95
|
+
prompt = this.processConditionals(prompt, {
|
|
96
|
+
hasGroundTruth: groundTruthExists,
|
|
97
|
+
});
|
|
98
|
+
if (groundTruthExists && input.groundTruth) {
|
|
99
|
+
prompt = prompt.replace("{{groundTruth}}", input.groundTruth);
|
|
100
|
+
}
|
|
101
|
+
return prompt;
|
|
102
|
+
}
|
|
103
|
+
parseResponse(response, _input) {
|
|
104
|
+
const json = this.extractJSON(response);
|
|
105
|
+
if (!json) {
|
|
106
|
+
// Try to extract score from text
|
|
107
|
+
const score = this.extractScoreFromText(response);
|
|
108
|
+
return {
|
|
109
|
+
score,
|
|
110
|
+
reasoning: "Could not parse structured response",
|
|
111
|
+
confidence: 0.3,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
const hallucinations = Array.isArray(json.hallucinations)
|
|
115
|
+
? json.hallucinations
|
|
116
|
+
: [];
|
|
117
|
+
const severities = hallucinations.map((h) => h.severity ?? "unknown");
|
|
118
|
+
const rawScore = typeof json.score === "number" ? json.score : 5;
|
|
119
|
+
const score = Math.max(0, Math.min(10, rawScore)); // Clamp to 0-10
|
|
120
|
+
return {
|
|
121
|
+
score,
|
|
122
|
+
reasoning: typeof json.reasoning === "string"
|
|
123
|
+
? json.reasoning
|
|
124
|
+
: "No reasoning provided",
|
|
125
|
+
confidence: typeof json.confidence === "number" ? json.confidence : 0.8,
|
|
126
|
+
metadata: {
|
|
127
|
+
hallucinationCount: hallucinations.length,
|
|
128
|
+
hallucinations,
|
|
129
|
+
severityBreakdown: {
|
|
130
|
+
minor: severities.filter((s) => s === "minor").length,
|
|
131
|
+
moderate: severities.filter((s) => s === "moderate").length,
|
|
132
|
+
severe: severities.filter((s) => s === "severe").length,
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
export async function createHallucinationScorer(config) {
|
|
139
|
+
return new HallucinationScorer(config);
|
|
140
|
+
}
|
|
141
|
+
//# sourceMappingURL=hallucinationScorer.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file LLM Scorers Index
|
|
3
|
+
* Export all LLM-based scorers
|
|
4
|
+
*/
|
|
5
|
+
export { AnswerRelevancyScorer, createAnswerRelevancyScorer, } from "./answerRelevancyScorer.js";
|
|
6
|
+
export { BaseLLMScorer, DEFAULT_LLM_SCORER_CONFIG } from "./baseLLMScorer.js";
|
|
7
|
+
export { BiasDetectionScorer, createBiasDetectionScorer, } from "./biasDetectionScorer.js";
|
|
8
|
+
export { ContextPrecisionScorer, createContextPrecisionScorer, } from "./contextPrecisionScorer.js";
|
|
9
|
+
export { ContextRelevancyScorer, createContextRelevancyScorer, } from "./contextRelevancyScorer.js";
|
|
10
|
+
export { createFaithfulnessScorer, FaithfulnessScorer, } from "./faithfulnessScorer.js";
|
|
11
|
+
export { createHallucinationScorer, HallucinationScorer, } from "./hallucinationScorer.js";
|
|
12
|
+
export { createPromptAlignmentScorer, PromptAlignmentScorer, } from "./promptAlignmentScorer.js";
|
|
13
|
+
export { createSummarizationScorer, SummarizationScorer, } from "./summarizationScorer.js";
|
|
14
|
+
export { createToneConsistencyScorer, ToneConsistencyScorer, } from "./toneConsistencyScorer.js";
|
|
15
|
+
export { createToxicityScorer, ToxicityScorer } from "./toxicityScorer.js";
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file LLM Scorers Index
|
|
3
|
+
* Export all LLM-based scorers
|
|
4
|
+
*/
|
|
5
|
+
export { AnswerRelevancyScorer, createAnswerRelevancyScorer, } from "./answerRelevancyScorer.js";
|
|
6
|
+
export { BaseLLMScorer, DEFAULT_LLM_SCORER_CONFIG } from "./baseLLMScorer.js";
|
|
7
|
+
export { BiasDetectionScorer, createBiasDetectionScorer, } from "./biasDetectionScorer.js";
|
|
8
|
+
export { ContextPrecisionScorer, createContextPrecisionScorer, } from "./contextPrecisionScorer.js";
|
|
9
|
+
export { ContextRelevancyScorer, createContextRelevancyScorer, } from "./contextRelevancyScorer.js";
|
|
10
|
+
export { createFaithfulnessScorer, FaithfulnessScorer, } from "./faithfulnessScorer.js";
|
|
11
|
+
// LLM Scorers
|
|
12
|
+
export { createHallucinationScorer, HallucinationScorer, } from "./hallucinationScorer.js";
|
|
13
|
+
export { createPromptAlignmentScorer, PromptAlignmentScorer, } from "./promptAlignmentScorer.js";
|
|
14
|
+
export { createSummarizationScorer, SummarizationScorer, } from "./summarizationScorer.js";
|
|
15
|
+
export { createToneConsistencyScorer, ToneConsistencyScorer, } from "./toneConsistencyScorer.js";
|
|
16
|
+
export { createToxicityScorer, ToxicityScorer } from "./toxicityScorer.js";
|
|
17
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Prompt alignment scorer
|
|
3
|
+
* Measures how well the response aligns with prompt instructions
|
|
4
|
+
*/
|
|
5
|
+
import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
7
|
+
export declare class PromptAlignmentScorer extends BaseLLMScorer {
|
|
8
|
+
constructor(config?: Partial<LLMScorerConfig>);
|
|
9
|
+
generatePrompt(input: ScorerInput): string;
|
|
10
|
+
parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
|
|
11
|
+
}
|
|
12
|
+
export declare function createPromptAlignmentScorer(config?: Partial<LLMScorerConfig>): Promise<PromptAlignmentScorer>;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Prompt alignment scorer
|
|
3
|
+
* Measures how well the response aligns with prompt instructions
|
|
4
|
+
*/
|
|
5
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
6
|
+
const PROMPT_ALIGNMENT_PROMPT = `You are an expert at evaluating how well AI responses follow instructions.
|
|
7
|
+
|
|
8
|
+
**User Instructions/Query:**
|
|
9
|
+
{{query}}
|
|
10
|
+
|
|
11
|
+
**AI Response:**
|
|
12
|
+
{{response}}
|
|
13
|
+
|
|
14
|
+
## Instructions
|
|
15
|
+
|
|
16
|
+
Analyze how well the response follows the instructions in the query. Check for:
|
|
17
|
+
|
|
18
|
+
1. **Instruction Following**: Does it do what was asked?
|
|
19
|
+
2. **Format Compliance**: Does it follow requested format?
|
|
20
|
+
3. **Constraint Adherence**: Does it respect any constraints given?
|
|
21
|
+
4. **Completeness**: Does it address all parts of the request?
|
|
22
|
+
5. **No Hallucinated Instructions**: Does it avoid adding unrequested content?
|
|
23
|
+
|
|
24
|
+
## Output Format (JSON)
|
|
25
|
+
|
|
26
|
+
{
|
|
27
|
+
"score": <0-10>,
|
|
28
|
+
"instructionFollowing": {
|
|
29
|
+
"score": <0-10>,
|
|
30
|
+
"details": "<explanation>"
|
|
31
|
+
},
|
|
32
|
+
"formatCompliance": {
|
|
33
|
+
"score": <0-10>,
|
|
34
|
+
"details": "<explanation>"
|
|
35
|
+
},
|
|
36
|
+
"constraintAdherence": {
|
|
37
|
+
"score": <0-10>,
|
|
38
|
+
"details": "<explanation>"
|
|
39
|
+
},
|
|
40
|
+
"completeness": {
|
|
41
|
+
"score": <0-10>,
|
|
42
|
+
"details": "<explanation>"
|
|
43
|
+
},
|
|
44
|
+
"missedInstructions": ["<list of missed requirements>"],
|
|
45
|
+
"extraContent": ["<list of unrequested content>"],
|
|
46
|
+
"reasoning": "<overall assessment>",
|
|
47
|
+
"confidence": <0.0-1.0>
|
|
48
|
+
}`;
|
|
49
|
+
export class PromptAlignmentScorer extends BaseLLMScorer {
|
|
50
|
+
constructor(config) {
|
|
51
|
+
super({
|
|
52
|
+
id: "prompt-alignment",
|
|
53
|
+
name: "Prompt Alignment",
|
|
54
|
+
description: "Measures how well the response aligns with prompt instructions",
|
|
55
|
+
type: "llm",
|
|
56
|
+
category: "quality",
|
|
57
|
+
version: "1.0.0",
|
|
58
|
+
defaultConfig: {
|
|
59
|
+
enabled: true,
|
|
60
|
+
threshold: 0.7,
|
|
61
|
+
weight: 1.0,
|
|
62
|
+
timeout: 25000,
|
|
63
|
+
retries: 2,
|
|
64
|
+
},
|
|
65
|
+
requiredInputs: ["query", "response"],
|
|
66
|
+
optionalInputs: [],
|
|
67
|
+
}, config);
|
|
68
|
+
}
|
|
69
|
+
generatePrompt(input) {
|
|
70
|
+
return this.substituteTemplate(PROMPT_ALIGNMENT_PROMPT, {
|
|
71
|
+
query: input.query,
|
|
72
|
+
response: input.response,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
parseResponse(response, _input) {
|
|
76
|
+
const json = this.extractJSON(response);
|
|
77
|
+
if (!json) {
|
|
78
|
+
const score = this.extractScoreFromText(response);
|
|
79
|
+
return {
|
|
80
|
+
score,
|
|
81
|
+
reasoning: "Could not parse structured response",
|
|
82
|
+
confidence: 0.3,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
const score = Math.min(10, Math.max(0, typeof json.score === "number" ? json.score : 5));
|
|
86
|
+
const confidence = Math.min(1, Math.max(0, typeof json.confidence === "number" ? json.confidence : 0.8));
|
|
87
|
+
return {
|
|
88
|
+
score,
|
|
89
|
+
reasoning: typeof json.reasoning === "string"
|
|
90
|
+
? json.reasoning
|
|
91
|
+
: "No reasoning provided",
|
|
92
|
+
confidence,
|
|
93
|
+
metadata: {
|
|
94
|
+
instructionFollowing: json.instructionFollowing ?? null,
|
|
95
|
+
formatCompliance: json.formatCompliance ?? null,
|
|
96
|
+
constraintAdherence: json.constraintAdherence ?? null,
|
|
97
|
+
completeness: json.completeness ?? null,
|
|
98
|
+
missedInstructions: json.missedInstructions ?? [],
|
|
99
|
+
extraContent: json.extraContent ?? [],
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
export async function createPromptAlignmentScorer(config) {
|
|
105
|
+
return new PromptAlignmentScorer(config);
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=promptAlignmentScorer.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Summarization quality scorer
|
|
3
|
+
* Evaluates the quality of AI-generated summaries
|
|
4
|
+
*/
|
|
5
|
+
import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
7
|
+
export declare class SummarizationScorer extends BaseLLMScorer {
|
|
8
|
+
constructor(config?: Partial<LLMScorerConfig>);
|
|
9
|
+
generatePrompt(input: ScorerInput): string;
|
|
10
|
+
parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
|
|
11
|
+
}
|
|
12
|
+
export declare function createSummarizationScorer(config?: Partial<LLMScorerConfig>): Promise<SummarizationScorer>;
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Summarization quality scorer
|
|
3
|
+
* Evaluates the quality of AI-generated summaries
|
|
4
|
+
*/
|
|
5
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
6
|
+
const SUMMARIZATION_PROMPT = `You are an expert at evaluating summary quality.
|
|
7
|
+
|
|
8
|
+
**Summary to Evaluate:**
|
|
9
|
+
{{response}}
|
|
10
|
+
|
|
11
|
+
**Original Content:**
|
|
12
|
+
{{context}}
|
|
13
|
+
|
|
14
|
+
{{#if hasQuery}}
|
|
15
|
+
**Summarization Request:**
|
|
16
|
+
{{query}}
|
|
17
|
+
{{/if}}
|
|
18
|
+
|
|
19
|
+
## Evaluation Criteria
|
|
20
|
+
|
|
21
|
+
1. **Accuracy**: Is the summary factually correct?
|
|
22
|
+
2. **Coverage**: Does it capture the key points?
|
|
23
|
+
3. **Conciseness**: Is it appropriately brief?
|
|
24
|
+
4. **Coherence**: Is it well-organized and readable?
|
|
25
|
+
5. **No Hallucinations**: Does it avoid adding new information?
|
|
26
|
+
|
|
27
|
+
## Output Format (JSON)
|
|
28
|
+
|
|
29
|
+
{
|
|
30
|
+
"score": <0-10>,
|
|
31
|
+
"accuracy": {
|
|
32
|
+
"score": <0-10>,
|
|
33
|
+
"errors": ["<list of factual errors>"]
|
|
34
|
+
},
|
|
35
|
+
"coverage": {
|
|
36
|
+
"score": <0-10>,
|
|
37
|
+
"keyPointsCovered": ["<covered points>"],
|
|
38
|
+
"keyPointsMissed": ["<missed points>"]
|
|
39
|
+
},
|
|
40
|
+
"conciseness": {
|
|
41
|
+
"score": <0-10>,
|
|
42
|
+
"assessment": "<too long|appropriate|too short>"
|
|
43
|
+
},
|
|
44
|
+
"coherence": {
|
|
45
|
+
"score": <0-10>,
|
|
46
|
+
"issues": ["<any coherence issues>"]
|
|
47
|
+
},
|
|
48
|
+
"hallucinations": ["<any fabricated information>"],
|
|
49
|
+
"reasoning": "<overall assessment>",
|
|
50
|
+
"confidence": <0.0-1.0>
|
|
51
|
+
}`;
|
|
52
|
+
export class SummarizationScorer extends BaseLLMScorer {
|
|
53
|
+
constructor(config) {
|
|
54
|
+
super({
|
|
55
|
+
id: "summarization",
|
|
56
|
+
name: "Summarization Quality",
|
|
57
|
+
description: "Evaluates the quality of AI-generated summaries",
|
|
58
|
+
type: "llm",
|
|
59
|
+
category: "quality",
|
|
60
|
+
version: "1.0.0",
|
|
61
|
+
defaultConfig: {
|
|
62
|
+
enabled: true,
|
|
63
|
+
threshold: 0.7,
|
|
64
|
+
weight: 1.0,
|
|
65
|
+
timeout: 25000,
|
|
66
|
+
retries: 2,
|
|
67
|
+
},
|
|
68
|
+
requiredInputs: ["response", "context"],
|
|
69
|
+
optionalInputs: ["query"],
|
|
70
|
+
}, config);
|
|
71
|
+
}
|
|
72
|
+
generatePrompt(input) {
|
|
73
|
+
let prompt = SUMMARIZATION_PROMPT;
|
|
74
|
+
const hasQuery = !!input.query;
|
|
75
|
+
prompt = this.processConditionals(prompt, { hasQuery });
|
|
76
|
+
return this.substituteTemplate(prompt, {
|
|
77
|
+
response: input.response,
|
|
78
|
+
context: input.context?.join("\n\n") ?? "",
|
|
79
|
+
query: hasQuery ? input.query : "",
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
parseResponse(response, _input) {
|
|
83
|
+
const json = this.extractJSON(response);
|
|
84
|
+
if (!json) {
|
|
85
|
+
const score = this.extractScoreFromText(response);
|
|
86
|
+
return {
|
|
87
|
+
score,
|
|
88
|
+
reasoning: "Could not parse structured response",
|
|
89
|
+
confidence: 0.3,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
const score = Math.min(10, Math.max(0, typeof json.score === "number"
|
|
93
|
+
? json.score
|
|
94
|
+
: this.extractScoreFromText(response)));
|
|
95
|
+
const confidence = Math.min(1, Math.max(0, typeof json.confidence === "number" ? json.confidence : 0.3));
|
|
96
|
+
return {
|
|
97
|
+
score,
|
|
98
|
+
reasoning: typeof json.reasoning === "string"
|
|
99
|
+
? json.reasoning
|
|
100
|
+
: "No reasoning provided",
|
|
101
|
+
confidence,
|
|
102
|
+
metadata: {
|
|
103
|
+
accuracy: json.accuracy ?? null,
|
|
104
|
+
coverage: json.coverage ?? null,
|
|
105
|
+
conciseness: json.conciseness ?? null,
|
|
106
|
+
coherence: json.coherence ?? null,
|
|
107
|
+
hallucinations: json.hallucinations ?? [],
|
|
108
|
+
},
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
export async function createSummarizationScorer(config) {
|
|
113
|
+
return new SummarizationScorer(config);
|
|
114
|
+
}
|
|
115
|
+
//# sourceMappingURL=summarizationScorer.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Tone consistency scorer
|
|
3
|
+
* Checks for consistent tone throughout the response
|
|
4
|
+
*/
|
|
5
|
+
import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
7
|
+
export declare class ToneConsistencyScorer extends BaseLLMScorer {
|
|
8
|
+
constructor(config?: Partial<LLMScorerConfig>);
|
|
9
|
+
generatePrompt(input: ScorerInput): string;
|
|
10
|
+
parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
|
|
11
|
+
}
|
|
12
|
+
export declare function createToneConsistencyScorer(config?: Partial<LLMScorerConfig>): Promise<ToneConsistencyScorer>;
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Tone consistency scorer
|
|
3
|
+
* Checks for consistent tone throughout the response
|
|
4
|
+
*/
|
|
5
|
+
import { BaseLLMScorer } from "./baseLLMScorer.js";
|
|
6
|
+
const TONE_CONSISTENCY_PROMPT = `You are an expert at analyzing writing tone and style consistency.
|
|
7
|
+
|
|
8
|
+
{{#if hasQuery}}
|
|
9
|
+
**User Query:**
|
|
10
|
+
{{query}}
|
|
11
|
+
{{/if}}
|
|
12
|
+
|
|
13
|
+
**AI Response:**
|
|
14
|
+
{{response}}
|
|
15
|
+
|
|
16
|
+
## Instructions
|
|
17
|
+
|
|
18
|
+
Analyze the response for tone consistency. Check for:
|
|
19
|
+
1. Consistent formality level throughout
|
|
20
|
+
2. Consistent emotional tone
|
|
21
|
+
3. Consistent voice (e.g., professional, casual, friendly)
|
|
22
|
+
4. No jarring shifts in style
|
|
23
|
+
5. Appropriate tone for the context
|
|
24
|
+
|
|
25
|
+
## Output Format (JSON)
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
"score": <0-10>,
|
|
29
|
+
"dominantTone": "<identified main tone>",
|
|
30
|
+
"formalityLevel": "<formal|semi-formal|casual|mixed>",
|
|
31
|
+
"toneShifts": [
|
|
32
|
+
{
|
|
33
|
+
"location": "<beginning|middle|end>",
|
|
34
|
+
"from": "<original tone>",
|
|
35
|
+
"to": "<shifted tone>",
|
|
36
|
+
"severity": "<minor|moderate|major>"
|
|
37
|
+
}
|
|
38
|
+
],
|
|
39
|
+
"reasoning": "<detailed assessment>",
|
|
40
|
+
"confidence": <0.0-1.0>
|
|
41
|
+
}`;
|
|
42
|
+
export class ToneConsistencyScorer extends BaseLLMScorer {
|
|
43
|
+
constructor(config) {
|
|
44
|
+
super({
|
|
45
|
+
id: "tone-consistency",
|
|
46
|
+
name: "Tone Consistency",
|
|
47
|
+
description: "Checks for consistent tone throughout the response",
|
|
48
|
+
type: "llm",
|
|
49
|
+
category: "quality",
|
|
50
|
+
version: "1.0.0",
|
|
51
|
+
defaultConfig: {
|
|
52
|
+
enabled: true,
|
|
53
|
+
threshold: 0.7,
|
|
54
|
+
weight: 0.8,
|
|
55
|
+
timeout: 20000,
|
|
56
|
+
retries: 1,
|
|
57
|
+
},
|
|
58
|
+
requiredInputs: ["response"],
|
|
59
|
+
optionalInputs: ["query"],
|
|
60
|
+
}, config);
|
|
61
|
+
}
|
|
62
|
+
generatePrompt(input) {
|
|
63
|
+
let prompt = TONE_CONSISTENCY_PROMPT;
|
|
64
|
+
const hasQuery = !!input.query;
|
|
65
|
+
prompt = this.processConditionals(prompt, { hasQuery });
|
|
66
|
+
if (hasQuery) {
|
|
67
|
+
prompt = this.substituteTemplate(prompt, { query: input.query });
|
|
68
|
+
}
|
|
69
|
+
prompt = this.substituteTemplate(prompt, { response: input.response });
|
|
70
|
+
return prompt;
|
|
71
|
+
}
|
|
72
|
+
parseResponse(response, _input) {
|
|
73
|
+
const json = this.extractJSON(response);
|
|
74
|
+
if (!json) {
|
|
75
|
+
const score = this.extractScoreFromText(response);
|
|
76
|
+
return {
|
|
77
|
+
score,
|
|
78
|
+
reasoning: "Could not parse structured response",
|
|
79
|
+
confidence: 0.3,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
const toneShifts = Array.isArray(json.toneShifts)
|
|
83
|
+
? json.toneShifts
|
|
84
|
+
: [];
|
|
85
|
+
const score = typeof json.score === "number"
|
|
86
|
+
? json.score
|
|
87
|
+
: this.extractScoreFromText(response);
|
|
88
|
+
const confidence = typeof json.confidence === "number" ? json.confidence : 0.3;
|
|
89
|
+
return {
|
|
90
|
+
score,
|
|
91
|
+
reasoning: typeof json.reasoning === "string"
|
|
92
|
+
? json.reasoning
|
|
93
|
+
: "Could not parse structured response",
|
|
94
|
+
confidence,
|
|
95
|
+
metadata: {
|
|
96
|
+
dominantTone: json.dominantTone ?? "unknown",
|
|
97
|
+
formalityLevel: json.formalityLevel ?? "unknown",
|
|
98
|
+
toneShifts,
|
|
99
|
+
shiftCount: toneShifts.length,
|
|
100
|
+
},
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
export async function createToneConsistencyScorer(config) {
|
|
105
|
+
return new ToneConsistencyScorer(config);
|
|
106
|
+
}
|
|
107
|
+
//# sourceMappingURL=toneConsistencyScorer.js.map
|