@juspay/neurolink 9.36.1 → 9.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +921 -423
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +1 -1
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Keyword Coverage Scorer
|
|
3
|
+
* Evaluates how well the response covers expected keywords or topics
|
|
4
|
+
*/
|
|
5
|
+
import { BaseRuleScorer } from "./baseRuleScorer.js";
|
|
6
|
+
/**
|
|
7
|
+
* Scorer metadata for keyword coverage
|
|
8
|
+
*/
|
|
9
|
+
const KEYWORD_COVERAGE_METADATA = {
|
|
10
|
+
id: "keyword-coverage",
|
|
11
|
+
name: "Keyword Coverage",
|
|
12
|
+
description: "Evaluates how well the response covers expected keywords or topics",
|
|
13
|
+
type: "rule",
|
|
14
|
+
category: "quality",
|
|
15
|
+
version: "1.0.0",
|
|
16
|
+
defaultConfig: {
|
|
17
|
+
enabled: true,
|
|
18
|
+
threshold: 0.7,
|
|
19
|
+
weight: 1.0,
|
|
20
|
+
timeout: 1000,
|
|
21
|
+
retries: 0,
|
|
22
|
+
},
|
|
23
|
+
requiredInputs: ["response"],
|
|
24
|
+
optionalInputs: ["query", "context", "groundTruth", "custom"],
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* KeywordCoverageScorer evaluates how well a response covers expected keywords
|
|
28
|
+
*/
|
|
29
|
+
export class KeywordCoverageScorer extends BaseRuleScorer {
|
|
30
|
+
_keywordConfig;
|
|
31
|
+
_dynamicRules = [];
|
|
32
|
+
constructor(config) {
|
|
33
|
+
super(KEYWORD_COVERAGE_METADATA, config);
|
|
34
|
+
this._keywordConfig = {
|
|
35
|
+
minCoverage: 0.7,
|
|
36
|
+
caseInsensitive: true,
|
|
37
|
+
wordBoundary: true,
|
|
38
|
+
keywords: [],
|
|
39
|
+
...config,
|
|
40
|
+
};
|
|
41
|
+
// Build rules from keywords
|
|
42
|
+
this._buildRulesFromKeywords();
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Build scorer rules from keyword configuration
|
|
46
|
+
*/
|
|
47
|
+
_buildRulesFromKeywords() {
|
|
48
|
+
this._dynamicRules = this._buildRulesFromKeywordsList(this._keywordConfig.keywords ?? []);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Build scorer rules from a keyword list without mutating instance state.
|
|
52
|
+
* Returns rules directly so callers can use them locally.
|
|
53
|
+
*/
|
|
54
|
+
_buildRulesFromKeywordsList(keywords) {
|
|
55
|
+
const weights = this._keywordConfig.keywordWeights ?? {};
|
|
56
|
+
return keywords.map((keyword, index) => ({
|
|
57
|
+
id: `keyword-${index}-${keyword.toLowerCase().replace(/\s+/g, "-")}`,
|
|
58
|
+
description: `Check for keyword: ${keyword}`,
|
|
59
|
+
type: "keyword",
|
|
60
|
+
params: {
|
|
61
|
+
keyword,
|
|
62
|
+
caseInsensitive: this._keywordConfig.caseInsensitive ?? true,
|
|
63
|
+
wordBoundary: this._keywordConfig.wordBoundary ?? true,
|
|
64
|
+
synonyms: this._keywordConfig.synonyms?.[keyword] ?? [],
|
|
65
|
+
},
|
|
66
|
+
weight: weights[keyword] ?? 1.0,
|
|
67
|
+
}));
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Get keyword-specific configuration
|
|
71
|
+
*/
|
|
72
|
+
get keywordConfig() {
|
|
73
|
+
return this._keywordConfig;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Get rules for this scorer
|
|
77
|
+
*/
|
|
78
|
+
getRules() {
|
|
79
|
+
return this._dynamicRules;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Set keywords dynamically
|
|
83
|
+
*/
|
|
84
|
+
setKeywords(keywords, weights) {
|
|
85
|
+
this._keywordConfig.keywords = keywords;
|
|
86
|
+
if (weights) {
|
|
87
|
+
this._keywordConfig.keywordWeights = weights;
|
|
88
|
+
}
|
|
89
|
+
this._buildRulesFromKeywords();
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Extract keywords from context or ground truth if not provided
|
|
93
|
+
*/
|
|
94
|
+
_extractKeywordsFromInput(input) {
|
|
95
|
+
// If keywords are configured, use those
|
|
96
|
+
if (this._keywordConfig.keywords &&
|
|
97
|
+
this._keywordConfig.keywords.length > 0) {
|
|
98
|
+
return this._keywordConfig.keywords;
|
|
99
|
+
}
|
|
100
|
+
// Try to extract from custom input
|
|
101
|
+
if (input.custom?.keywords && Array.isArray(input.custom.keywords)) {
|
|
102
|
+
return input.custom.keywords;
|
|
103
|
+
}
|
|
104
|
+
// Extract important words from ground truth if available
|
|
105
|
+
if (input.groundTruth) {
|
|
106
|
+
return this._extractImportantWords(input.groundTruth);
|
|
107
|
+
}
|
|
108
|
+
// Extract from context
|
|
109
|
+
if (input.context && input.context.length > 0) {
|
|
110
|
+
const contextText = input.context.join(" ");
|
|
111
|
+
return this._extractImportantWords(contextText);
|
|
112
|
+
}
|
|
113
|
+
return [];
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Extract important words from text (simple extraction)
|
|
117
|
+
*/
|
|
118
|
+
_extractImportantWords(text) {
|
|
119
|
+
// Remove common stop words and extract longer words
|
|
120
|
+
const stopWords = new Set([
|
|
121
|
+
"the",
|
|
122
|
+
"a",
|
|
123
|
+
"an",
|
|
124
|
+
"is",
|
|
125
|
+
"are",
|
|
126
|
+
"was",
|
|
127
|
+
"were",
|
|
128
|
+
"be",
|
|
129
|
+
"been",
|
|
130
|
+
"being",
|
|
131
|
+
"have",
|
|
132
|
+
"has",
|
|
133
|
+
"had",
|
|
134
|
+
"do",
|
|
135
|
+
"does",
|
|
136
|
+
"did",
|
|
137
|
+
"will",
|
|
138
|
+
"would",
|
|
139
|
+
"could",
|
|
140
|
+
"should",
|
|
141
|
+
"may",
|
|
142
|
+
"might",
|
|
143
|
+
"must",
|
|
144
|
+
"shall",
|
|
145
|
+
"can",
|
|
146
|
+
"need",
|
|
147
|
+
"dare",
|
|
148
|
+
"ought",
|
|
149
|
+
"used",
|
|
150
|
+
"to",
|
|
151
|
+
"of",
|
|
152
|
+
"in",
|
|
153
|
+
"for",
|
|
154
|
+
"on",
|
|
155
|
+
"with",
|
|
156
|
+
"at",
|
|
157
|
+
"by",
|
|
158
|
+
"from",
|
|
159
|
+
"as",
|
|
160
|
+
"into",
|
|
161
|
+
"through",
|
|
162
|
+
"during",
|
|
163
|
+
"before",
|
|
164
|
+
"after",
|
|
165
|
+
"above",
|
|
166
|
+
"below",
|
|
167
|
+
"between",
|
|
168
|
+
"under",
|
|
169
|
+
"again",
|
|
170
|
+
"further",
|
|
171
|
+
"then",
|
|
172
|
+
"once",
|
|
173
|
+
"and",
|
|
174
|
+
"but",
|
|
175
|
+
"or",
|
|
176
|
+
"nor",
|
|
177
|
+
"so",
|
|
178
|
+
"yet",
|
|
179
|
+
"both",
|
|
180
|
+
"either",
|
|
181
|
+
"neither",
|
|
182
|
+
"not",
|
|
183
|
+
"only",
|
|
184
|
+
"own",
|
|
185
|
+
"same",
|
|
186
|
+
"than",
|
|
187
|
+
"too",
|
|
188
|
+
"very",
|
|
189
|
+
"just",
|
|
190
|
+
"also",
|
|
191
|
+
"now",
|
|
192
|
+
"here",
|
|
193
|
+
"there",
|
|
194
|
+
"when",
|
|
195
|
+
"where",
|
|
196
|
+
"why",
|
|
197
|
+
"how",
|
|
198
|
+
"all",
|
|
199
|
+
"each",
|
|
200
|
+
"every",
|
|
201
|
+
"any",
|
|
202
|
+
"some",
|
|
203
|
+
"no",
|
|
204
|
+
"other",
|
|
205
|
+
"such",
|
|
206
|
+
"this",
|
|
207
|
+
"that",
|
|
208
|
+
"these",
|
|
209
|
+
"those",
|
|
210
|
+
"i",
|
|
211
|
+
"you",
|
|
212
|
+
"he",
|
|
213
|
+
"she",
|
|
214
|
+
"it",
|
|
215
|
+
"we",
|
|
216
|
+
"they",
|
|
217
|
+
"what",
|
|
218
|
+
"which",
|
|
219
|
+
"who",
|
|
220
|
+
"whom",
|
|
221
|
+
]);
|
|
222
|
+
const words = text
|
|
223
|
+
.toLowerCase()
|
|
224
|
+
.replace(/[^\w\s]/g, " ")
|
|
225
|
+
.split(/\s+/)
|
|
226
|
+
.filter((word) => word.length > 3 && !stopWords.has(word));
|
|
227
|
+
// Get unique words, sorted by frequency
|
|
228
|
+
const wordCounts = new Map();
|
|
229
|
+
for (const word of words) {
|
|
230
|
+
wordCounts.set(word, (wordCounts.get(word) ?? 0) + 1);
|
|
231
|
+
}
|
|
232
|
+
return Array.from(wordCounts.entries())
|
|
233
|
+
.sort((a, b) => b[1] - a[1])
|
|
234
|
+
.slice(0, 10) // Top 10 keywords
|
|
235
|
+
.map(([word]) => word);
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Evaluate a single keyword rule
|
|
239
|
+
*/
|
|
240
|
+
evaluateRule(rule, input) {
|
|
241
|
+
const keyword = rule.params.keyword;
|
|
242
|
+
const synonyms = rule.params.synonyms ?? [];
|
|
243
|
+
const caseInsensitive = rule.params.caseInsensitive;
|
|
244
|
+
const wordBoundary = rule.params.wordBoundary;
|
|
245
|
+
const response = input.response;
|
|
246
|
+
// Check main keyword
|
|
247
|
+
const found = this._checkKeywordPresence(response, keyword, caseInsensitive, wordBoundary);
|
|
248
|
+
if (found) {
|
|
249
|
+
return { passed: true, score: 1.0 };
|
|
250
|
+
}
|
|
251
|
+
// Check synonyms
|
|
252
|
+
for (const synonym of synonyms) {
|
|
253
|
+
if (this._checkKeywordPresence(response, synonym, caseInsensitive, wordBoundary)) {
|
|
254
|
+
return { passed: true, score: 0.9 }; // Slightly lower for synonym match
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
return { passed: false, score: 0.0 };
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Check if a keyword is present in text
|
|
261
|
+
*/
|
|
262
|
+
_checkKeywordPresence(text, keyword, caseInsensitive, wordBoundary) {
|
|
263
|
+
if (wordBoundary) {
|
|
264
|
+
return this.containsKeyword(text, keyword, caseInsensitive);
|
|
265
|
+
}
|
|
266
|
+
if (caseInsensitive) {
|
|
267
|
+
return text.toLowerCase().includes(keyword.toLowerCase());
|
|
268
|
+
}
|
|
269
|
+
return text.includes(keyword);
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Override score to handle dynamic keyword extraction
|
|
273
|
+
*/
|
|
274
|
+
async score(input) {
|
|
275
|
+
// Extract keywords if not configured
|
|
276
|
+
const keywords = this._extractKeywordsFromInput(input);
|
|
277
|
+
// Build rules locally without mutating instance state
|
|
278
|
+
const effectiveRules = this._dynamicRules.length > 0
|
|
279
|
+
? this._dynamicRules
|
|
280
|
+
: this._buildRulesFromKeywordsList(keywords);
|
|
281
|
+
// If still no keywords, return a passing score with note
|
|
282
|
+
if (effectiveRules.length === 0) {
|
|
283
|
+
return this.createScoreResult(10, "No keywords configured or extractable - passing by default", {
|
|
284
|
+
metadata: {
|
|
285
|
+
noKeywordsConfigured: true,
|
|
286
|
+
},
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
// Call parent score method
|
|
290
|
+
const result = await super.score(input);
|
|
291
|
+
// Add coverage details to metadata
|
|
292
|
+
const details = this._calculateCoverageDetails(input);
|
|
293
|
+
return {
|
|
294
|
+
...result,
|
|
295
|
+
metadata: {
|
|
296
|
+
...result.metadata,
|
|
297
|
+
coverageDetails: details,
|
|
298
|
+
},
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Calculate detailed coverage information
|
|
303
|
+
*/
|
|
304
|
+
_calculateCoverageDetails(input) {
|
|
305
|
+
const keywords = this._keywordConfig.keywords ?? [];
|
|
306
|
+
const weights = this._keywordConfig.keywordWeights ?? {};
|
|
307
|
+
const response = input.response;
|
|
308
|
+
const foundKeywords = [];
|
|
309
|
+
const missingKeywords = [];
|
|
310
|
+
let totalWeight = 0;
|
|
311
|
+
let foundWeight = 0;
|
|
312
|
+
for (const keyword of keywords) {
|
|
313
|
+
const weight = weights[keyword] ?? 1.0;
|
|
314
|
+
totalWeight += weight;
|
|
315
|
+
const found = this._checkKeywordPresence(response, keyword, this._keywordConfig.caseInsensitive ?? true, this._keywordConfig.wordBoundary ?? true);
|
|
316
|
+
if (found) {
|
|
317
|
+
foundKeywords.push(keyword);
|
|
318
|
+
foundWeight += weight;
|
|
319
|
+
}
|
|
320
|
+
else {
|
|
321
|
+
// Check synonyms
|
|
322
|
+
const synonyms = this._keywordConfig.synonyms?.[keyword] ?? [];
|
|
323
|
+
const synonymFound = synonyms.some((syn) => this._checkKeywordPresence(response, syn, this._keywordConfig.caseInsensitive ?? true, this._keywordConfig.wordBoundary ?? true));
|
|
324
|
+
if (synonymFound) {
|
|
325
|
+
foundKeywords.push(keyword);
|
|
326
|
+
foundWeight += weight * 0.9; // Slightly less for synonym
|
|
327
|
+
}
|
|
328
|
+
else {
|
|
329
|
+
missingKeywords.push(keyword);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
return {
|
|
334
|
+
totalKeywords: keywords.length,
|
|
335
|
+
foundKeywords,
|
|
336
|
+
missingKeywords,
|
|
337
|
+
coverageRatio: keywords.length > 0 ? foundKeywords.length / keywords.length : 1,
|
|
338
|
+
weightedCoverage: totalWeight > 0 ? foundWeight / totalWeight : 1,
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
/**
|
|
343
|
+
* Factory function for creating KeywordCoverageScorer instances
|
|
344
|
+
*/
|
|
345
|
+
export async function createKeywordCoverageScorer(config) {
|
|
346
|
+
return new KeywordCoverageScorer(config);
|
|
347
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Length Scorer
|
|
3
|
+
* Evaluates response length against configured constraints
|
|
4
|
+
*/
|
|
5
|
+
import type { RuleScorerConfig, ScoreResult, ScorerInput, ScorerRule } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseRuleScorer } from "./baseRuleScorer.js";
|
|
7
|
+
/**
|
|
8
|
+
* Length measurement unit
|
|
9
|
+
*/
|
|
10
|
+
export type LengthUnit = "words" | "characters" | "sentences" | "paragraphs" | "tokens";
|
|
11
|
+
/**
|
|
12
|
+
* Length constraint type
|
|
13
|
+
*/
|
|
14
|
+
export type LengthConstraintType = "exact" | "range" | "minimum" | "maximum" | "ratio";
|
|
15
|
+
/**
|
|
16
|
+
* Configuration specific to length scoring
|
|
17
|
+
*/
|
|
18
|
+
export type LengthScorerConfig = RuleScorerConfig & {
|
|
19
|
+
/** Unit of measurement */
|
|
20
|
+
unit?: LengthUnit;
|
|
21
|
+
/** Constraint type */
|
|
22
|
+
constraintType?: LengthConstraintType;
|
|
23
|
+
/** Minimum length (for range/minimum constraints) */
|
|
24
|
+
minLength?: number;
|
|
25
|
+
/** Maximum length (for range/maximum constraints) */
|
|
26
|
+
maxLength?: number;
|
|
27
|
+
/** Exact length (for exact constraint) */
|
|
28
|
+
exactLength?: number;
|
|
29
|
+
/** Tolerance for exact length (as percentage) */
|
|
30
|
+
tolerance?: number;
|
|
31
|
+
/** Compare ratio with query/context length */
|
|
32
|
+
ratioTarget?: number;
|
|
33
|
+
/** Ratio reference: compare against query or context */
|
|
34
|
+
ratioReference?: "query" | "context";
|
|
35
|
+
/** Scoring mode: binary (pass/fail) or proportional */
|
|
36
|
+
scoringMode?: "binary" | "proportional";
|
|
37
|
+
};
|
|
38
|
+
/**
|
|
39
|
+
* LengthScorer evaluates response length against configurable constraints
|
|
40
|
+
*/
|
|
41
|
+
export declare class LengthScorer extends BaseRuleScorer {
|
|
42
|
+
private _lengthConfig;
|
|
43
|
+
constructor(config?: LengthScorerConfig);
|
|
44
|
+
/**
|
|
45
|
+
* Get length-specific configuration
|
|
46
|
+
*/
|
|
47
|
+
get lengthConfig(): LengthScorerConfig;
|
|
48
|
+
/**
|
|
49
|
+
* Get rules for this scorer
|
|
50
|
+
*/
|
|
51
|
+
getRules(): ScorerRule[];
|
|
52
|
+
/**
|
|
53
|
+
* Measure text length in various units
|
|
54
|
+
*/
|
|
55
|
+
private _measureLength;
|
|
56
|
+
/**
|
|
57
|
+
* Get length in the configured unit
|
|
58
|
+
*/
|
|
59
|
+
private _getLengthInUnit;
|
|
60
|
+
/**
|
|
61
|
+
* Count sentences in text
|
|
62
|
+
*/
|
|
63
|
+
private _countSentences;
|
|
64
|
+
/**
|
|
65
|
+
* Count paragraphs in text
|
|
66
|
+
*/
|
|
67
|
+
private _countParagraphs;
|
|
68
|
+
/**
|
|
69
|
+
* Estimate token count (rough approximation)
|
|
70
|
+
* GPT-style: ~4 characters per token on average
|
|
71
|
+
*/
|
|
72
|
+
private _estimateTokens;
|
|
73
|
+
/**
|
|
74
|
+
* Evaluate a single length rule
|
|
75
|
+
*/
|
|
76
|
+
evaluateRule(rule: ScorerRule, input: ScorerInput): {
|
|
77
|
+
passed: boolean;
|
|
78
|
+
score: number;
|
|
79
|
+
};
|
|
80
|
+
/**
|
|
81
|
+
* Override score to add detailed length metrics
|
|
82
|
+
*/
|
|
83
|
+
score(input: ScorerInput): Promise<ScoreResult>;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Factory function for creating LengthScorer instances
|
|
87
|
+
*/
|
|
88
|
+
export declare function createLengthScorer(config?: LengthScorerConfig): Promise<LengthScorer>;
|
|
89
|
+
/**
|
|
90
|
+
* Pre-configured length scorer presets
|
|
91
|
+
*/
|
|
92
|
+
export declare const LengthScorerPresets: {
|
|
93
|
+
/** Short response (50-150 words) */
|
|
94
|
+
readonly short: () => LengthScorer;
|
|
95
|
+
/** Medium response (100-300 words) */
|
|
96
|
+
readonly medium: () => LengthScorer;
|
|
97
|
+
/** Long response (200-500 words) */
|
|
98
|
+
readonly long: () => LengthScorer;
|
|
99
|
+
/** Concise response (max 100 words) */
|
|
100
|
+
readonly concise: () => LengthScorer;
|
|
101
|
+
/** Detailed response (min 300 words) */
|
|
102
|
+
readonly detailed: () => LengthScorer;
|
|
103
|
+
/** Tweet-length (max 280 characters) */
|
|
104
|
+
readonly tweet: () => LengthScorer;
|
|
105
|
+
};
|