@juspay/neurolink 9.36.1 → 9.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +921 -423
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +1 -1
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Content Similarity Scorer
|
|
3
|
+
* Evaluates text similarity using various metrics (Jaccard, cosine, Levenshtein)
|
|
4
|
+
*/
|
|
5
|
+
import { BaseScorer } from "../baseScorer.js";
|
|
6
|
+
import { DEFAULT_RULE_SCORER_CONFIG } from "./baseRuleScorer.js";
|
|
7
|
+
/**
|
|
8
|
+
* Scorer metadata for content similarity
|
|
9
|
+
*/
|
|
10
|
+
const CONTENT_SIMILARITY_METADATA = {
|
|
11
|
+
id: "content-similarity",
|
|
12
|
+
name: "Content Similarity",
|
|
13
|
+
description: "Evaluates text similarity using various metrics like Jaccard, cosine, Levenshtein",
|
|
14
|
+
type: "rule",
|
|
15
|
+
category: "accuracy",
|
|
16
|
+
version: "1.0.0",
|
|
17
|
+
defaultConfig: {
|
|
18
|
+
...DEFAULT_RULE_SCORER_CONFIG,
|
|
19
|
+
threshold: 0.5,
|
|
20
|
+
},
|
|
21
|
+
requiredInputs: ["response"],
|
|
22
|
+
optionalInputs: ["groundTruth", "context", "custom"],
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* ContentSimilarityScorer evaluates how similar the response is to a reference text
|
|
26
|
+
*/
|
|
27
|
+
export class ContentSimilarityScorer extends BaseScorer {
|
|
28
|
+
_similarityConfig;
|
|
29
|
+
constructor(config) {
|
|
30
|
+
super(CONTENT_SIMILARITY_METADATA, config);
|
|
31
|
+
this._similarityConfig = {
|
|
32
|
+
metric: "jaccard",
|
|
33
|
+
normalizeText: true,
|
|
34
|
+
tokenLevel: "word",
|
|
35
|
+
ngramSize: 2,
|
|
36
|
+
compareWith: "groundTruth",
|
|
37
|
+
metricCombination: "average",
|
|
38
|
+
...config,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Get similarity-specific configuration
|
|
43
|
+
*/
|
|
44
|
+
get similarityConfig() {
|
|
45
|
+
return this._similarityConfig;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Get reference text based on configuration
|
|
49
|
+
*/
|
|
50
|
+
_getReferenceText(input) {
|
|
51
|
+
switch (this._similarityConfig.compareWith) {
|
|
52
|
+
case "groundTruth":
|
|
53
|
+
return input.groundTruth ?? null;
|
|
54
|
+
case "context":
|
|
55
|
+
if (input.context && input.context.length > 0) {
|
|
56
|
+
return input.context.join(" ");
|
|
57
|
+
}
|
|
58
|
+
return null;
|
|
59
|
+
case "custom":
|
|
60
|
+
return (this._similarityConfig.referenceText ??
|
|
61
|
+
input.custom?.referenceText ??
|
|
62
|
+
null);
|
|
63
|
+
default:
|
|
64
|
+
return input.groundTruth ?? null;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Calculate similarity between two texts
|
|
69
|
+
*/
|
|
70
|
+
_calculateSimilarity(text1, text2, metric) {
|
|
71
|
+
const normalizedText1 = this._similarityConfig.normalizeText
|
|
72
|
+
? this._normalizeText(text1)
|
|
73
|
+
: text1;
|
|
74
|
+
const normalizedText2 = this._similarityConfig.normalizeText
|
|
75
|
+
? this._normalizeText(text2)
|
|
76
|
+
: text2;
|
|
77
|
+
const tokens1 = this._tokenize(normalizedText1);
|
|
78
|
+
const tokens2 = this._tokenize(normalizedText2);
|
|
79
|
+
switch (metric) {
|
|
80
|
+
case "jaccard":
|
|
81
|
+
return this._jaccardSimilarity(tokens1, tokens2);
|
|
82
|
+
case "cosine":
|
|
83
|
+
return this._cosineSimilarity(tokens1, tokens2);
|
|
84
|
+
case "levenshtein":
|
|
85
|
+
return this._levenshteinSimilarity(normalizedText1, normalizedText2);
|
|
86
|
+
case "dice":
|
|
87
|
+
return this._diceSimilarity(tokens1, tokens2);
|
|
88
|
+
case "overlap":
|
|
89
|
+
return this._overlapCoefficient(tokens1, tokens2);
|
|
90
|
+
default:
|
|
91
|
+
return this._jaccardSimilarity(tokens1, tokens2);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Normalize text for comparison
|
|
96
|
+
*/
|
|
97
|
+
_normalizeText(text) {
|
|
98
|
+
return text
|
|
99
|
+
.toLowerCase()
|
|
100
|
+
.replace(/[^\w\s]/g, " ")
|
|
101
|
+
.replace(/\s+/g, " ")
|
|
102
|
+
.trim();
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Tokenize text based on configuration
|
|
106
|
+
*/
|
|
107
|
+
_tokenize(text) {
|
|
108
|
+
switch (this._similarityConfig.tokenLevel) {
|
|
109
|
+
case "character":
|
|
110
|
+
return text.split("");
|
|
111
|
+
case "ngram": {
|
|
112
|
+
const n = this._similarityConfig.ngramSize ?? 2;
|
|
113
|
+
const ngrams = [];
|
|
114
|
+
for (let i = 0; i <= text.length - n; i++) {
|
|
115
|
+
ngrams.push(text.slice(i, i + n));
|
|
116
|
+
}
|
|
117
|
+
return ngrams;
|
|
118
|
+
}
|
|
119
|
+
case "word":
|
|
120
|
+
default:
|
|
121
|
+
return text.split(/\s+/).filter((word) => word.length > 0);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Calculate Jaccard similarity coefficient
|
|
126
|
+
* J(A,B) = |A ∩ B| / |A ∪ B|
|
|
127
|
+
*/
|
|
128
|
+
_jaccardSimilarity(tokens1, tokens2) {
|
|
129
|
+
const set1 = new Set(tokens1);
|
|
130
|
+
const set2 = new Set(tokens2);
|
|
131
|
+
const intersection = Array.from(set1).filter((x) => set2.has(x));
|
|
132
|
+
const unionArr = [...Array.from(set1), ...Array.from(set2)];
|
|
133
|
+
const union = new Set(unionArr);
|
|
134
|
+
if (union.size === 0) {
|
|
135
|
+
return 1.0;
|
|
136
|
+
}
|
|
137
|
+
return intersection.length / union.size;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Calculate cosine similarity using term frequency vectors
|
|
141
|
+
*/
|
|
142
|
+
_cosineSimilarity(tokens1, tokens2) {
|
|
143
|
+
const freq1 = this._getTermFrequency(tokens1);
|
|
144
|
+
const freq2 = this._getTermFrequency(tokens2);
|
|
145
|
+
const allTermsArr = [
|
|
146
|
+
...Array.from(freq1.keys()),
|
|
147
|
+
...Array.from(freq2.keys()),
|
|
148
|
+
];
|
|
149
|
+
const allTerms = new Set(allTermsArr);
|
|
150
|
+
let dotProduct = 0;
|
|
151
|
+
let magnitude1 = 0;
|
|
152
|
+
let magnitude2 = 0;
|
|
153
|
+
for (const term of Array.from(allTerms)) {
|
|
154
|
+
const f1 = freq1.get(term) ?? 0;
|
|
155
|
+
const f2 = freq2.get(term) ?? 0;
|
|
156
|
+
dotProduct += f1 * f2;
|
|
157
|
+
magnitude1 += f1 * f1;
|
|
158
|
+
magnitude2 += f2 * f2;
|
|
159
|
+
}
|
|
160
|
+
const magnitude = Math.sqrt(magnitude1) * Math.sqrt(magnitude2);
|
|
161
|
+
if (magnitude === 0) {
|
|
162
|
+
return 1.0;
|
|
163
|
+
}
|
|
164
|
+
return dotProduct / magnitude;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Get term frequency map
|
|
168
|
+
*/
|
|
169
|
+
_getTermFrequency(tokens) {
|
|
170
|
+
const freq = new Map();
|
|
171
|
+
for (const token of tokens) {
|
|
172
|
+
freq.set(token, (freq.get(token) ?? 0) + 1);
|
|
173
|
+
}
|
|
174
|
+
return freq;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Calculate normalized Levenshtein similarity
|
|
178
|
+
* Returns 1 - (edit_distance / max_length)
|
|
179
|
+
*/
|
|
180
|
+
_levenshteinSimilarity(text1, text2) {
|
|
181
|
+
// Guard against excessive memory usage for large texts
|
|
182
|
+
const MAX_LEVENSHTEIN_LENGTH = 5000;
|
|
183
|
+
if (text1.length > MAX_LEVENSHTEIN_LENGTH ||
|
|
184
|
+
text2.length > MAX_LEVENSHTEIN_LENGTH) {
|
|
185
|
+
// Fall back to a faster approximation for large texts
|
|
186
|
+
return this._jaccardSimilarity(text1.split(""), text2.split(""));
|
|
187
|
+
}
|
|
188
|
+
const distance = this._levenshteinDistance(text1, text2);
|
|
189
|
+
const maxLength = Math.max(text1.length, text2.length);
|
|
190
|
+
if (maxLength === 0) {
|
|
191
|
+
return 1.0;
|
|
192
|
+
}
|
|
193
|
+
return 1 - distance / maxLength;
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Calculate Levenshtein edit distance using space-optimized two-row DP
|
|
197
|
+
*/
|
|
198
|
+
_levenshteinDistance(str1, str2) {
|
|
199
|
+
const m = str1.length;
|
|
200
|
+
const n = str2.length;
|
|
201
|
+
// Use shorter string for row storage
|
|
202
|
+
if (m < n) {
|
|
203
|
+
return this._levenshteinDistance(str2, str1);
|
|
204
|
+
}
|
|
205
|
+
// Space-optimized: only keep previous and current row
|
|
206
|
+
let prevRow = new Array(n + 1);
|
|
207
|
+
let currRow = new Array(n + 1);
|
|
208
|
+
// Initialize base case
|
|
209
|
+
for (let j = 0; j <= n; j++) {
|
|
210
|
+
prevRow[j] = j;
|
|
211
|
+
}
|
|
212
|
+
// Fill rows
|
|
213
|
+
for (let i = 1; i <= m; i++) {
|
|
214
|
+
currRow[0] = i;
|
|
215
|
+
for (let j = 1; j <= n; j++) {
|
|
216
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
217
|
+
currRow[j] = prevRow[j - 1];
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
currRow[j] = 1 + Math.min(prevRow[j], currRow[j - 1], prevRow[j - 1]);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
// Swap rows
|
|
224
|
+
[prevRow, currRow] = [currRow, prevRow];
|
|
225
|
+
}
|
|
226
|
+
return prevRow[n];
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Calculate Dice coefficient (Sorensen-Dice)
|
|
230
|
+
* DSC(A,B) = 2|A ∩ B| / (|A| + |B|)
|
|
231
|
+
*/
|
|
232
|
+
_diceSimilarity(tokens1, tokens2) {
|
|
233
|
+
const set1 = new Set(tokens1);
|
|
234
|
+
const set2 = new Set(tokens2);
|
|
235
|
+
const intersection = Array.from(set1).filter((x) => set2.has(x));
|
|
236
|
+
const totalSize = set1.size + set2.size;
|
|
237
|
+
if (totalSize === 0) {
|
|
238
|
+
return 1.0;
|
|
239
|
+
}
|
|
240
|
+
return (2 * intersection.length) / totalSize;
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Calculate overlap coefficient
|
|
244
|
+
* O(A,B) = |A ∩ B| / min(|A|, |B|)
|
|
245
|
+
*/
|
|
246
|
+
_overlapCoefficient(tokens1, tokens2) {
|
|
247
|
+
const set1 = new Set(tokens1);
|
|
248
|
+
const set2 = new Set(tokens2);
|
|
249
|
+
const intersection = Array.from(set1).filter((x) => set2.has(x));
|
|
250
|
+
const minSize = Math.min(set1.size, set2.size);
|
|
251
|
+
if (minSize === 0) {
|
|
252
|
+
return 1.0;
|
|
253
|
+
}
|
|
254
|
+
return intersection.length / minSize;
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Override score to add detailed similarity metrics
|
|
258
|
+
*/
|
|
259
|
+
async score(input) {
|
|
260
|
+
const reference = this._getReferenceText(input);
|
|
261
|
+
if (!reference) {
|
|
262
|
+
return this.createScoreResult(10, "No reference text available for comparison - passing by default", {
|
|
263
|
+
metadata: {
|
|
264
|
+
noReferenceText: true,
|
|
265
|
+
compareWith: this._similarityConfig.compareWith ?? "groundTruth",
|
|
266
|
+
},
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
// Calculate all metrics for detailed reporting
|
|
270
|
+
const metrics = this._similarityConfig.metrics ?? [
|
|
271
|
+
this._similarityConfig.metric ?? "jaccard",
|
|
272
|
+
];
|
|
273
|
+
const details = [];
|
|
274
|
+
for (const metric of metrics) {
|
|
275
|
+
const score = this._calculateSimilarity(input.response, reference, metric);
|
|
276
|
+
const responseTokens = this._tokenize(this._similarityConfig.normalizeText
|
|
277
|
+
? this._normalizeText(input.response)
|
|
278
|
+
: input.response);
|
|
279
|
+
const referenceTokens = this._tokenize(this._similarityConfig.normalizeText
|
|
280
|
+
? this._normalizeText(reference)
|
|
281
|
+
: reference);
|
|
282
|
+
details.push({
|
|
283
|
+
metric,
|
|
284
|
+
score,
|
|
285
|
+
responseTokens: responseTokens.length,
|
|
286
|
+
referenceTokens: referenceTokens.length,
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
// Calculate combined score
|
|
290
|
+
const combinedScore = this._combineMetricScores(details);
|
|
291
|
+
const normalizedScore = combinedScore * 10; // Scale to 0-10
|
|
292
|
+
return this.createScoreResult(normalizedScore, this._generateSimilarityReasoning(details, combinedScore), {
|
|
293
|
+
metadata: {
|
|
294
|
+
similarityDetails: details,
|
|
295
|
+
combinedScore,
|
|
296
|
+
compareWith: this._similarityConfig.compareWith ?? "groundTruth",
|
|
297
|
+
tokenLevel: this._similarityConfig.tokenLevel ?? "word",
|
|
298
|
+
},
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Combine multiple metric scores
|
|
303
|
+
*/
|
|
304
|
+
_combineMetricScores(details) {
|
|
305
|
+
if (details.length === 0) {
|
|
306
|
+
return 1.0;
|
|
307
|
+
}
|
|
308
|
+
if (details.length === 1) {
|
|
309
|
+
return details[0].score;
|
|
310
|
+
}
|
|
311
|
+
const combination = this._similarityConfig.metricCombination ?? "average";
|
|
312
|
+
const weights = this._similarityConfig.metricWeights ?? {};
|
|
313
|
+
switch (combination) {
|
|
314
|
+
case "min":
|
|
315
|
+
return Math.min(...details.map((d) => d.score));
|
|
316
|
+
case "max":
|
|
317
|
+
return Math.max(...details.map((d) => d.score));
|
|
318
|
+
case "weighted": {
|
|
319
|
+
let totalWeight = 0;
|
|
320
|
+
let weightedSum = 0;
|
|
321
|
+
for (const detail of details) {
|
|
322
|
+
const weight = weights[detail.metric] ?? 1.0;
|
|
323
|
+
totalWeight += weight;
|
|
324
|
+
weightedSum += detail.score * weight;
|
|
325
|
+
}
|
|
326
|
+
return totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
327
|
+
}
|
|
328
|
+
case "average":
|
|
329
|
+
default:
|
|
330
|
+
return details.reduce((sum, d) => sum + d.score, 0) / details.length;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Generate reasoning from similarity details
|
|
335
|
+
*/
|
|
336
|
+
_generateSimilarityReasoning(details, combinedScore) {
|
|
337
|
+
const parts = [];
|
|
338
|
+
for (const detail of details) {
|
|
339
|
+
parts.push(`${detail.metric}: ${(detail.score * 100).toFixed(1)}%`);
|
|
340
|
+
}
|
|
341
|
+
const overallPct = (combinedScore * 100).toFixed(1);
|
|
342
|
+
return `Similarity scores - ${parts.join(", ")}. Overall: ${overallPct}%`;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
/**
|
|
346
|
+
* Factory function for creating ContentSimilarityScorer instances
|
|
347
|
+
*/
|
|
348
|
+
export async function createContentSimilarityScorer(config) {
|
|
349
|
+
return new ContentSimilarityScorer(config);
|
|
350
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Format Scorer
|
|
3
|
+
* Evaluates response format compliance (JSON, markdown, code, etc.)
|
|
4
|
+
*/
|
|
5
|
+
import type { RuleScorerConfig, ScoreResult, ScorerInput, ScorerRule } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseRuleScorer } from "./baseRuleScorer.js";
|
|
7
|
+
/**
|
|
8
|
+
* Expected format types
|
|
9
|
+
*/
|
|
10
|
+
export type FormatType = "json" | "markdown" | "code" | "list" | "numbered-list" | "bullet-list" | "table" | "yaml" | "xml" | "plain" | "html" | "custom";
|
|
11
|
+
/**
|
|
12
|
+
* Code language types for code format validation
|
|
13
|
+
*/
|
|
14
|
+
export type CodeLanguage = "javascript" | "typescript" | "python" | "java" | "c" | "cpp" | "csharp" | "go" | "rust" | "sql" | "bash" | "any";
|
|
15
|
+
/**
|
|
16
|
+
* Configuration specific to format scoring
|
|
17
|
+
*/
|
|
18
|
+
export type FormatScorerConfig = RuleScorerConfig & {
|
|
19
|
+
/** Expected format type */
|
|
20
|
+
expectedFormat?: FormatType;
|
|
21
|
+
/** Multiple allowed formats */
|
|
22
|
+
allowedFormats?: FormatType[];
|
|
23
|
+
/** For code format: expected language */
|
|
24
|
+
codeLanguage?: CodeLanguage;
|
|
25
|
+
/** For JSON format: validate against schema */
|
|
26
|
+
jsonSchema?: object;
|
|
27
|
+
/** For markdown: required elements */
|
|
28
|
+
markdownRequirements?: {
|
|
29
|
+
hasHeadings?: boolean;
|
|
30
|
+
hasCodeBlocks?: boolean;
|
|
31
|
+
hasLinks?: boolean;
|
|
32
|
+
hasLists?: boolean;
|
|
33
|
+
minHeadingLevel?: number;
|
|
34
|
+
maxHeadingLevel?: number;
|
|
35
|
+
};
|
|
36
|
+
/** For list format: requirements */
|
|
37
|
+
listRequirements?: {
|
|
38
|
+
minItems?: number;
|
|
39
|
+
maxItems?: number;
|
|
40
|
+
itemPattern?: string;
|
|
41
|
+
};
|
|
42
|
+
/** Custom format regex pattern */
|
|
43
|
+
customPattern?: string;
|
|
44
|
+
/** Whether format must be exclusive (no other content) */
|
|
45
|
+
strictFormat?: boolean;
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* FormatScorer evaluates response format against expected formats
|
|
49
|
+
*/
|
|
50
|
+
export declare class FormatScorer extends BaseRuleScorer {
|
|
51
|
+
private _formatConfig;
|
|
52
|
+
constructor(config?: FormatScorerConfig);
|
|
53
|
+
/**
|
|
54
|
+
* Get format-specific configuration
|
|
55
|
+
*/
|
|
56
|
+
get formatConfig(): FormatScorerConfig;
|
|
57
|
+
/**
|
|
58
|
+
* Get rules for this scorer
|
|
59
|
+
*/
|
|
60
|
+
getRules(): ScorerRule[];
|
|
61
|
+
/**
|
|
62
|
+
* Evaluate a single format rule
|
|
63
|
+
*/
|
|
64
|
+
evaluateRule(rule: ScorerRule, input: ScorerInput): {
|
|
65
|
+
passed: boolean;
|
|
66
|
+
score: number;
|
|
67
|
+
};
|
|
68
|
+
/**
|
|
69
|
+
* Validate format against allowed formats
|
|
70
|
+
*/
|
|
71
|
+
private _validateFormat;
|
|
72
|
+
/**
|
|
73
|
+
* Detect the format of the text
|
|
74
|
+
*/
|
|
75
|
+
private _detectFormat;
|
|
76
|
+
/**
|
|
77
|
+
* Check if text is valid JSON
|
|
78
|
+
*/
|
|
79
|
+
private _isValidJson;
|
|
80
|
+
/**
|
|
81
|
+
* Check if text appears to be YAML
|
|
82
|
+
*/
|
|
83
|
+
private _isYaml;
|
|
84
|
+
/**
|
|
85
|
+
* Check if text is XML
|
|
86
|
+
*/
|
|
87
|
+
private _isXml;
|
|
88
|
+
/**
|
|
89
|
+
* Check if text has code blocks
|
|
90
|
+
*/
|
|
91
|
+
private _hasCodeBlocks;
|
|
92
|
+
/**
|
|
93
|
+
* Check if text has markdown elements
|
|
94
|
+
*/
|
|
95
|
+
private _hasMarkdownElements;
|
|
96
|
+
/**
|
|
97
|
+
* Check if text is a numbered list
|
|
98
|
+
*/
|
|
99
|
+
private _isNumberedList;
|
|
100
|
+
/**
|
|
101
|
+
* Check if text is a bullet list
|
|
102
|
+
*/
|
|
103
|
+
private _isBulletList;
|
|
104
|
+
/**
|
|
105
|
+
* Check if text has a table
|
|
106
|
+
*/
|
|
107
|
+
private _hasTable;
|
|
108
|
+
/**
|
|
109
|
+
* Check markdown-specific requirements
|
|
110
|
+
*/
|
|
111
|
+
private _checkMarkdownRequirements;
|
|
112
|
+
/**
|
|
113
|
+
* Check list-specific requirements
|
|
114
|
+
*/
|
|
115
|
+
private _checkListRequirements;
|
|
116
|
+
/**
|
|
117
|
+
* Validate JSON against schema (basic validation)
|
|
118
|
+
*/
|
|
119
|
+
private _validateJsonSchema;
|
|
120
|
+
/**
|
|
121
|
+
* Override score to add detailed format analysis
|
|
122
|
+
*/
|
|
123
|
+
score(input: ScorerInput): Promise<ScoreResult>;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Factory function for creating FormatScorer instances
|
|
127
|
+
*/
|
|
128
|
+
export declare function createFormatScorer(config?: FormatScorerConfig): Promise<FormatScorer>;
|
|
129
|
+
/**
|
|
130
|
+
* Pre-configured format scorer presets
|
|
131
|
+
*/
|
|
132
|
+
export declare const FormatScorerPresets: {
|
|
133
|
+
/** JSON format */
|
|
134
|
+
readonly json: () => FormatScorer;
|
|
135
|
+
/** Markdown format */
|
|
136
|
+
readonly markdown: () => FormatScorer;
|
|
137
|
+
/** Markdown with headings required */
|
|
138
|
+
readonly markdownWithHeadings: () => FormatScorer;
|
|
139
|
+
/** Bullet list format */
|
|
140
|
+
readonly bulletList: () => FormatScorer;
|
|
141
|
+
/** Numbered list format */
|
|
142
|
+
readonly numberedList: () => FormatScorer;
|
|
143
|
+
/** Code response */
|
|
144
|
+
readonly code: () => FormatScorer;
|
|
145
|
+
/** Plain text only */
|
|
146
|
+
readonly plainText: () => FormatScorer;
|
|
147
|
+
};
|