@juspay/neurolink 9.36.1 → 9.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +921 -423
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +1 -1
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Format Scorer
|
|
3
|
+
* Evaluates response format compliance (JSON, markdown, code, etc.)
|
|
4
|
+
*/
|
|
5
|
+
import { logger } from "../../../utils/logger.js";
|
|
6
|
+
import { BaseRuleScorer, DEFAULT_RULE_SCORER_CONFIG, } from "./baseRuleScorer.js";
|
|
7
|
+
/**
|
|
8
|
+
* Scorer metadata for format
|
|
9
|
+
*/
|
|
10
|
+
const FORMAT_METADATA = {
|
|
11
|
+
id: "format",
|
|
12
|
+
name: "Format Validator",
|
|
13
|
+
description: "Evaluates response format compliance (JSON, markdown, code, lists, etc.)",
|
|
14
|
+
type: "rule",
|
|
15
|
+
category: "quality",
|
|
16
|
+
version: "1.0.0",
|
|
17
|
+
defaultConfig: {
|
|
18
|
+
...DEFAULT_RULE_SCORER_CONFIG,
|
|
19
|
+
threshold: 0.8,
|
|
20
|
+
},
|
|
21
|
+
requiredInputs: ["response"],
|
|
22
|
+
optionalInputs: ["custom"],
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* FormatScorer evaluates response format against expected formats
|
|
26
|
+
*/
|
|
27
|
+
export class FormatScorer extends BaseRuleScorer {
|
|
28
|
+
_formatConfig;
|
|
29
|
+
constructor(config) {
|
|
30
|
+
super(FORMAT_METADATA, config);
|
|
31
|
+
this._formatConfig = {
|
|
32
|
+
expectedFormat: "plain",
|
|
33
|
+
strictFormat: false,
|
|
34
|
+
...config,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Get format-specific configuration
|
|
39
|
+
*/
|
|
40
|
+
get formatConfig() {
|
|
41
|
+
return this._formatConfig;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Get rules for this scorer
|
|
45
|
+
*/
|
|
46
|
+
getRules() {
|
|
47
|
+
const rules = [];
|
|
48
|
+
const formats = this._formatConfig.allowedFormats ?? [
|
|
49
|
+
this._formatConfig.expectedFormat ?? "plain",
|
|
50
|
+
];
|
|
51
|
+
// Main format rule
|
|
52
|
+
rules.push({
|
|
53
|
+
id: "format-check",
|
|
54
|
+
description: `Check format is one of: ${formats.join(", ")}`,
|
|
55
|
+
type: "custom",
|
|
56
|
+
params: {
|
|
57
|
+
formats,
|
|
58
|
+
strict: this._formatConfig.strictFormat ?? false,
|
|
59
|
+
},
|
|
60
|
+
weight: 1.0,
|
|
61
|
+
});
|
|
62
|
+
// Add specific requirement rules based on format
|
|
63
|
+
if (formats.includes("markdown") &&
|
|
64
|
+
this._formatConfig.markdownRequirements) {
|
|
65
|
+
rules.push({
|
|
66
|
+
id: "markdown-requirements",
|
|
67
|
+
description: "Check markdown structure requirements",
|
|
68
|
+
type: "custom",
|
|
69
|
+
params: {
|
|
70
|
+
requirements: this._formatConfig.markdownRequirements,
|
|
71
|
+
},
|
|
72
|
+
weight: 0.5,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
if ((formats.includes("list") ||
|
|
76
|
+
formats.includes("numbered-list") ||
|
|
77
|
+
formats.includes("bullet-list")) &&
|
|
78
|
+
this._formatConfig.listRequirements) {
|
|
79
|
+
rules.push({
|
|
80
|
+
id: "list-requirements",
|
|
81
|
+
description: "Check list structure requirements",
|
|
82
|
+
type: "custom",
|
|
83
|
+
params: {
|
|
84
|
+
requirements: this._formatConfig.listRequirements,
|
|
85
|
+
},
|
|
86
|
+
weight: 0.5,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
if (formats.includes("json") && this._formatConfig.jsonSchema) {
|
|
90
|
+
rules.push({
|
|
91
|
+
id: "json-schema",
|
|
92
|
+
description: "Validate JSON against schema",
|
|
93
|
+
type: "custom",
|
|
94
|
+
params: {
|
|
95
|
+
schema: this._formatConfig
|
|
96
|
+
.jsonSchema,
|
|
97
|
+
},
|
|
98
|
+
weight: 0.5,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
return rules;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Evaluate a single format rule
|
|
105
|
+
*/
|
|
106
|
+
evaluateRule(rule, input) {
|
|
107
|
+
switch (rule.id) {
|
|
108
|
+
case "format-check": {
|
|
109
|
+
const formats = rule.params.formats;
|
|
110
|
+
const result = this._validateFormat(input.response, formats);
|
|
111
|
+
return {
|
|
112
|
+
passed: result.isValid,
|
|
113
|
+
score: result.isValid ? 1.0 : 0.0,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
case "markdown-requirements": {
|
|
117
|
+
const requirements = rule.params
|
|
118
|
+
.requirements;
|
|
119
|
+
const result = this._checkMarkdownRequirements(input.response, requirements ?? {});
|
|
120
|
+
return result;
|
|
121
|
+
}
|
|
122
|
+
case "list-requirements": {
|
|
123
|
+
const requirements = rule.params
|
|
124
|
+
.requirements;
|
|
125
|
+
const result = this._checkListRequirements(input.response, requirements ?? {});
|
|
126
|
+
return result;
|
|
127
|
+
}
|
|
128
|
+
case "json-schema": {
|
|
129
|
+
const schema = rule.params.schema;
|
|
130
|
+
const result = this._validateJsonSchema(input.response, schema);
|
|
131
|
+
return result;
|
|
132
|
+
}
|
|
133
|
+
default:
|
|
134
|
+
return { passed: true, score: 1.0 };
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Validate format against allowed formats
|
|
139
|
+
*/
|
|
140
|
+
_validateFormat(text, allowedFormats) {
|
|
141
|
+
const issues = [];
|
|
142
|
+
// Detect format
|
|
143
|
+
const detectedFormat = this._detectFormat(text);
|
|
144
|
+
// Check if detected format is allowed
|
|
145
|
+
const isValid = allowedFormats.includes(detectedFormat) ||
|
|
146
|
+
(detectedFormat === "plain" && allowedFormats.includes("plain"));
|
|
147
|
+
if (!isValid) {
|
|
148
|
+
issues.push(`Expected format(s): ${allowedFormats.join(", ")}, but detected: ${detectedFormat}`);
|
|
149
|
+
}
|
|
150
|
+
return { isValid, detectedFormat, issues };
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Detect the format of the text
|
|
154
|
+
*/
|
|
155
|
+
_detectFormat(text) {
|
|
156
|
+
const trimmed = text.trim();
|
|
157
|
+
// Check JSON
|
|
158
|
+
if (this._isValidJson(trimmed)) {
|
|
159
|
+
return "json";
|
|
160
|
+
}
|
|
161
|
+
// Check YAML (basic detection)
|
|
162
|
+
if (this._isYaml(trimmed)) {
|
|
163
|
+
return "yaml";
|
|
164
|
+
}
|
|
165
|
+
// Check XML/HTML
|
|
166
|
+
if (this._isXml(trimmed)) {
|
|
167
|
+
return trimmed.toLowerCase().includes("<!doctype html") ||
|
|
168
|
+
trimmed.includes("<html")
|
|
169
|
+
? "html"
|
|
170
|
+
: "xml";
|
|
171
|
+
}
|
|
172
|
+
// Check code blocks
|
|
173
|
+
if (this._hasCodeBlocks(trimmed)) {
|
|
174
|
+
return "code";
|
|
175
|
+
}
|
|
176
|
+
// Check markdown elements
|
|
177
|
+
if (this._hasMarkdownElements(trimmed)) {
|
|
178
|
+
return "markdown";
|
|
179
|
+
}
|
|
180
|
+
// Check lists
|
|
181
|
+
if (this._isNumberedList(trimmed)) {
|
|
182
|
+
return "numbered-list";
|
|
183
|
+
}
|
|
184
|
+
if (this._isBulletList(trimmed)) {
|
|
185
|
+
return "bullet-list";
|
|
186
|
+
}
|
|
187
|
+
// Check tables (markdown style)
|
|
188
|
+
if (this._hasTable(trimmed)) {
|
|
189
|
+
return "table";
|
|
190
|
+
}
|
|
191
|
+
return "plain";
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Check if text is valid JSON
|
|
195
|
+
*/
|
|
196
|
+
_isValidJson(text) {
|
|
197
|
+
try {
|
|
198
|
+
const parsed = JSON.parse(text);
|
|
199
|
+
return typeof parsed === "object" && parsed !== null;
|
|
200
|
+
}
|
|
201
|
+
catch {
|
|
202
|
+
return false;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Check if text appears to be YAML
|
|
207
|
+
*/
|
|
208
|
+
_isYaml(text) {
|
|
209
|
+
// Basic YAML detection: key: value patterns
|
|
210
|
+
const lines = text.split("\n");
|
|
211
|
+
let yamlPatternCount = 0;
|
|
212
|
+
for (const line of lines) {
|
|
213
|
+
// Skip empty lines and comments
|
|
214
|
+
if (line.trim() === "" || line.trim().startsWith("#")) {
|
|
215
|
+
continue;
|
|
216
|
+
}
|
|
217
|
+
// Check for key: value pattern (but not URLs)
|
|
218
|
+
if (/^[\s-]*[\w_]+:\s*.+/.test(line) && !line.includes("://")) {
|
|
219
|
+
yamlPatternCount++;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
return yamlPatternCount >= 2;
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Check if text is XML
|
|
226
|
+
*/
|
|
227
|
+
_isXml(text) {
|
|
228
|
+
return ((text.startsWith("<?xml") || text.startsWith("<")) &&
|
|
229
|
+
text.endsWith(">") &&
|
|
230
|
+
/<\/?[a-zA-Z][a-zA-Z0-9]*[^>]{0,1000}>/.test(text.slice(0, 10000)));
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Check if text has code blocks
|
|
234
|
+
*/
|
|
235
|
+
_hasCodeBlocks(text) {
|
|
236
|
+
return /```[\s\S]{0,10000}?```/.test(text) || /`[^`]{1,1000}`/.test(text);
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Check if text has markdown elements
|
|
240
|
+
*/
|
|
241
|
+
_hasMarkdownElements(text) {
|
|
242
|
+
// Check for headings, bold, italic, links, etc.
|
|
243
|
+
const markdownPatterns = [
|
|
244
|
+
/^#{1,6}\s+.+/m, // Headings
|
|
245
|
+
/\*\*[^*]+\*\*/, // Bold
|
|
246
|
+
/\*[^*]+\*/, // Italic
|
|
247
|
+
/__[^_]+__/, // Bold (underscore)
|
|
248
|
+
/_[^_]+_/, // Italic (underscore)
|
|
249
|
+
/\[[^\]]{1,500}\]\([^)]{1,2000}\)/, // Links (bounded)
|
|
250
|
+
/!\[[^\]]{0,500}\]\([^)]{1,2000}\)/, // Images (bounded)
|
|
251
|
+
/^>\s+.+/m, // Blockquotes
|
|
252
|
+
/^-{3,}$/m, // Horizontal rule
|
|
253
|
+
/^\*{3,}$/m, // Horizontal rule
|
|
254
|
+
];
|
|
255
|
+
return markdownPatterns.some((pattern) => pattern.test(text));
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Check if text is a numbered list
|
|
259
|
+
*/
|
|
260
|
+
_isNumberedList(text) {
|
|
261
|
+
const lines = text
|
|
262
|
+
.split("\n")
|
|
263
|
+
.map((l) => l.trim())
|
|
264
|
+
.filter((l) => l.length > 0);
|
|
265
|
+
const numberedLines = lines.filter((l) => /^\d+[.)]\s+/.test(l));
|
|
266
|
+
return (numberedLines.length >= 2 && numberedLines.length / lines.length > 0.5);
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Check if text is a bullet list
|
|
270
|
+
*/
|
|
271
|
+
_isBulletList(text) {
|
|
272
|
+
const lines = text
|
|
273
|
+
.split("\n")
|
|
274
|
+
.map((l) => l.trim())
|
|
275
|
+
.filter((l) => l.length > 0);
|
|
276
|
+
const bulletLines = lines.filter((l) => /^[-*+]\s+/.test(l));
|
|
277
|
+
return bulletLines.length >= 2 && bulletLines.length / lines.length > 0.5;
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Check if text has a table
|
|
281
|
+
*/
|
|
282
|
+
_hasTable(text) {
|
|
283
|
+
// Markdown table pattern: | header | header |
|
|
284
|
+
return /\|.+\|/.test(text) && /\|[-:]+\|/.test(text);
|
|
285
|
+
}
|
|
286
|
+
/**
|
|
287
|
+
* Check markdown-specific requirements
|
|
288
|
+
*/
|
|
289
|
+
_checkMarkdownRequirements(text, requirements) {
|
|
290
|
+
let totalChecks = 0;
|
|
291
|
+
let passedChecks = 0;
|
|
292
|
+
if (requirements.hasHeadings !== undefined) {
|
|
293
|
+
totalChecks++;
|
|
294
|
+
if (/^#{1,6}\s+.+/m.test(text) === requirements.hasHeadings) {
|
|
295
|
+
passedChecks++;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
if (requirements.hasCodeBlocks !== undefined) {
|
|
299
|
+
totalChecks++;
|
|
300
|
+
if (this._hasCodeBlocks(text) === requirements.hasCodeBlocks) {
|
|
301
|
+
passedChecks++;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
if (requirements.hasLinks !== undefined) {
|
|
305
|
+
totalChecks++;
|
|
306
|
+
if (/\[[^\]]{1,500}\]\([^)]{1,2000}\)/.test(text) === requirements.hasLinks) {
|
|
307
|
+
passedChecks++;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
if (requirements.hasLists !== undefined) {
|
|
311
|
+
totalChecks++;
|
|
312
|
+
const hasList = this._isNumberedList(text) || this._isBulletList(text);
|
|
313
|
+
if (hasList === requirements.hasLists) {
|
|
314
|
+
passedChecks++;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
if (requirements.minHeadingLevel !== undefined ||
|
|
318
|
+
requirements.maxHeadingLevel !== undefined) {
|
|
319
|
+
totalChecks++;
|
|
320
|
+
const headingMatches = text.match(/^(#{1,6})\s+/gm);
|
|
321
|
+
if (headingMatches) {
|
|
322
|
+
const levels = headingMatches.map((m) => m.trim().indexOf(" "));
|
|
323
|
+
const minLevel = Math.min(...levels);
|
|
324
|
+
const maxLevel = Math.max(...levels);
|
|
325
|
+
const minOk = requirements.minHeadingLevel === undefined ||
|
|
326
|
+
minLevel >= requirements.minHeadingLevel;
|
|
327
|
+
const maxOk = requirements.maxHeadingLevel === undefined ||
|
|
328
|
+
maxLevel <= requirements.maxHeadingLevel;
|
|
329
|
+
if (minOk && maxOk) {
|
|
330
|
+
passedChecks++;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
if (totalChecks === 0) {
|
|
335
|
+
return { passed: true, score: 1.0 };
|
|
336
|
+
}
|
|
337
|
+
const score = passedChecks / totalChecks;
|
|
338
|
+
return { passed: score >= 0.8, score };
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Check list-specific requirements
|
|
342
|
+
*/
|
|
343
|
+
_checkListRequirements(text, requirements) {
|
|
344
|
+
const lines = text
|
|
345
|
+
.split("\n")
|
|
346
|
+
.map((l) => l.trim())
|
|
347
|
+
.filter((l) => l.length > 0);
|
|
348
|
+
const listItems = lines.filter((l) => /^(\d+[.)]\s+|[-*+]\s+)/.test(l));
|
|
349
|
+
let totalChecks = 0;
|
|
350
|
+
let passedChecks = 0;
|
|
351
|
+
if (requirements.minItems !== undefined) {
|
|
352
|
+
totalChecks++;
|
|
353
|
+
if (listItems.length >= requirements.minItems) {
|
|
354
|
+
passedChecks++;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
if (requirements.maxItems !== undefined) {
|
|
358
|
+
totalChecks++;
|
|
359
|
+
if (listItems.length <= requirements.maxItems) {
|
|
360
|
+
passedChecks++;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
if (requirements.itemPattern !== undefined) {
|
|
364
|
+
totalChecks++;
|
|
365
|
+
if (requirements.itemPattern.length > 100) {
|
|
366
|
+
logger.warn("[FormatScorer] itemPattern too long, using default");
|
|
367
|
+
}
|
|
368
|
+
else if (/(\+|\*|\?)\s*\).*?(\+|\*|\?)/.test(requirements.itemPattern) ||
|
|
369
|
+
/\(\?.*?\)\s*(\+|\*|\{)/.test(requirements.itemPattern)) {
|
|
370
|
+
// Detect nested quantifiers that can cause catastrophic backtracking
|
|
371
|
+
logger.warn("[FormatScorer] itemPattern contains potentially unsafe nested quantifiers");
|
|
372
|
+
}
|
|
373
|
+
else {
|
|
374
|
+
try {
|
|
375
|
+
const pattern = new RegExp(requirements.itemPattern);
|
|
376
|
+
const matchingItems = listItems.filter((item) => pattern.test(item));
|
|
377
|
+
if (matchingItems.length === listItems.length) {
|
|
378
|
+
passedChecks++;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
catch {
|
|
382
|
+
logger.warn("[FormatScorer] Invalid itemPattern, using default");
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
if (totalChecks === 0) {
|
|
387
|
+
return { passed: true, score: 1.0 };
|
|
388
|
+
}
|
|
389
|
+
const score = passedChecks / totalChecks;
|
|
390
|
+
return { passed: score >= 0.8, score };
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Validate JSON against schema (basic validation)
|
|
394
|
+
*/
|
|
395
|
+
_validateJsonSchema(text, _schema) {
|
|
396
|
+
// First check if it's valid JSON
|
|
397
|
+
try {
|
|
398
|
+
JSON.parse(text);
|
|
399
|
+
// TODO: Implement full JSON Schema validation
|
|
400
|
+
// For now, just check it's valid JSON
|
|
401
|
+
return { passed: true, score: 1.0 };
|
|
402
|
+
}
|
|
403
|
+
catch {
|
|
404
|
+
return { passed: false, score: 0.0 };
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
/**
|
|
408
|
+
* Override score to add detailed format analysis
|
|
409
|
+
*/
|
|
410
|
+
async score(input) {
|
|
411
|
+
const result = await super.score(input);
|
|
412
|
+
const detectedFormat = this._detectFormat(input.response);
|
|
413
|
+
return {
|
|
414
|
+
...result,
|
|
415
|
+
metadata: {
|
|
416
|
+
...result.metadata,
|
|
417
|
+
detectedFormat,
|
|
418
|
+
expectedFormat: this._formatConfig.expectedFormat ?? "plain",
|
|
419
|
+
allowedFormats: this._formatConfig.allowedFormats ?? [],
|
|
420
|
+
strictFormat: this._formatConfig.strictFormat ?? false,
|
|
421
|
+
},
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
/**
|
|
426
|
+
* Factory function for creating FormatScorer instances
|
|
427
|
+
*/
|
|
428
|
+
export async function createFormatScorer(config) {
|
|
429
|
+
return new FormatScorer(config);
|
|
430
|
+
}
|
|
431
|
+
/**
|
|
432
|
+
* Pre-configured format scorer presets
|
|
433
|
+
*/
|
|
434
|
+
export const FormatScorerPresets = {
|
|
435
|
+
/** JSON format */
|
|
436
|
+
json: () => new FormatScorer({
|
|
437
|
+
expectedFormat: "json",
|
|
438
|
+
strictFormat: true,
|
|
439
|
+
}),
|
|
440
|
+
/** Markdown format */
|
|
441
|
+
markdown: () => new FormatScorer({
|
|
442
|
+
expectedFormat: "markdown",
|
|
443
|
+
}),
|
|
444
|
+
/** Markdown with headings required */
|
|
445
|
+
markdownWithHeadings: () => new FormatScorer({
|
|
446
|
+
expectedFormat: "markdown",
|
|
447
|
+
markdownRequirements: {
|
|
448
|
+
hasHeadings: true,
|
|
449
|
+
minHeadingLevel: 1,
|
|
450
|
+
maxHeadingLevel: 3,
|
|
451
|
+
},
|
|
452
|
+
}),
|
|
453
|
+
/** Bullet list format */
|
|
454
|
+
bulletList: () => new FormatScorer({
|
|
455
|
+
expectedFormat: "bullet-list",
|
|
456
|
+
}),
|
|
457
|
+
/** Numbered list format */
|
|
458
|
+
numberedList: () => new FormatScorer({
|
|
459
|
+
expectedFormat: "numbered-list",
|
|
460
|
+
}),
|
|
461
|
+
/** Code response */
|
|
462
|
+
code: () => new FormatScorer({
|
|
463
|
+
expectedFormat: "code",
|
|
464
|
+
}),
|
|
465
|
+
/** Plain text only */
|
|
466
|
+
plainText: () => new FormatScorer({
|
|
467
|
+
expectedFormat: "plain",
|
|
468
|
+
strictFormat: true,
|
|
469
|
+
}),
|
|
470
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Rule Scorers Index
|
|
3
|
+
* Export all rule-based scorers
|
|
4
|
+
*/
|
|
5
|
+
export { BaseRuleScorer, DEFAULT_RULE_SCORER_CONFIG, } from "./baseRuleScorer.js";
|
|
6
|
+
export { type ContentSimilarityConfig, ContentSimilarityScorer, createContentSimilarityScorer, type SimilarityMetric, } from "./contentSimilarityScorer.js";
|
|
7
|
+
export { type CodeLanguage, createFormatScorer, FormatScorer, type FormatScorerConfig, FormatScorerPresets, type FormatType, } from "./formatScorer.js";
|
|
8
|
+
export { createKeywordCoverageScorer, type KeywordCoverageConfig, KeywordCoverageScorer, } from "./keywordCoverageScorer.js";
|
|
9
|
+
export { createLengthScorer, type LengthConstraintType, LengthScorer, type LengthScorerConfig, LengthScorerPresets, type LengthUnit, } from "./lengthScorer.js";
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Rule Scorers Index
|
|
3
|
+
* Export all rule-based scorers
|
|
4
|
+
*/
|
|
5
|
+
export { BaseRuleScorer, DEFAULT_RULE_SCORER_CONFIG, } from "./baseRuleScorer.js";
|
|
6
|
+
export { ContentSimilarityScorer, createContentSimilarityScorer, } from "./contentSimilarityScorer.js";
|
|
7
|
+
export { createFormatScorer, FormatScorer, FormatScorerPresets, } from "./formatScorer.js";
|
|
8
|
+
// Rule Scorers
|
|
9
|
+
export { createKeywordCoverageScorer, KeywordCoverageScorer, } from "./keywordCoverageScorer.js";
|
|
10
|
+
export { createLengthScorer, LengthScorer, LengthScorerPresets, } from "./lengthScorer.js";
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Keyword Coverage Scorer
|
|
3
|
+
* Evaluates how well the response covers expected keywords or topics
|
|
4
|
+
*/
|
|
5
|
+
import type { RuleScorerConfig, ScoreResult, ScorerInput, ScorerRule } from "../../../types/scorerTypes.js";
|
|
6
|
+
import { BaseRuleScorer } from "./baseRuleScorer.js";
|
|
7
|
+
/**
|
|
8
|
+
* Configuration specific to keyword coverage scoring
|
|
9
|
+
*/
|
|
10
|
+
export type KeywordCoverageConfig = RuleScorerConfig & {
|
|
11
|
+
/** Keywords to check for */
|
|
12
|
+
keywords?: string[];
|
|
13
|
+
/** Minimum coverage ratio (0-1) to pass */
|
|
14
|
+
minCoverage?: number;
|
|
15
|
+
/** Whether to use case-insensitive matching */
|
|
16
|
+
caseInsensitive?: boolean;
|
|
17
|
+
/** Whether to use word boundary matching */
|
|
18
|
+
wordBoundary?: boolean;
|
|
19
|
+
/** Synonyms map for flexible matching */
|
|
20
|
+
synonyms?: Record<string, string[]>;
|
|
21
|
+
/** Weight different keywords differently */
|
|
22
|
+
keywordWeights?: Record<string, number>;
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* KeywordCoverageScorer evaluates how well a response covers expected keywords
|
|
26
|
+
*/
|
|
27
|
+
export declare class KeywordCoverageScorer extends BaseRuleScorer {
|
|
28
|
+
private _keywordConfig;
|
|
29
|
+
private _dynamicRules;
|
|
30
|
+
constructor(config?: KeywordCoverageConfig);
|
|
31
|
+
/**
|
|
32
|
+
* Build scorer rules from keyword configuration
|
|
33
|
+
*/
|
|
34
|
+
private _buildRulesFromKeywords;
|
|
35
|
+
/**
|
|
36
|
+
* Build scorer rules from a keyword list without mutating instance state.
|
|
37
|
+
* Returns rules directly so callers can use them locally.
|
|
38
|
+
*/
|
|
39
|
+
private _buildRulesFromKeywordsList;
|
|
40
|
+
/**
|
|
41
|
+
* Get keyword-specific configuration
|
|
42
|
+
*/
|
|
43
|
+
get keywordConfig(): KeywordCoverageConfig;
|
|
44
|
+
/**
|
|
45
|
+
* Get rules for this scorer
|
|
46
|
+
*/
|
|
47
|
+
getRules(): ScorerRule[];
|
|
48
|
+
/**
|
|
49
|
+
* Set keywords dynamically
|
|
50
|
+
*/
|
|
51
|
+
setKeywords(keywords: string[], weights?: Record<string, number>): void;
|
|
52
|
+
/**
|
|
53
|
+
* Extract keywords from context or ground truth if not provided
|
|
54
|
+
*/
|
|
55
|
+
private _extractKeywordsFromInput;
|
|
56
|
+
/**
|
|
57
|
+
* Extract important words from text (simple extraction)
|
|
58
|
+
*/
|
|
59
|
+
private _extractImportantWords;
|
|
60
|
+
/**
|
|
61
|
+
* Evaluate a single keyword rule
|
|
62
|
+
*/
|
|
63
|
+
evaluateRule(rule: ScorerRule, input: ScorerInput): {
|
|
64
|
+
passed: boolean;
|
|
65
|
+
score: number;
|
|
66
|
+
};
|
|
67
|
+
/**
|
|
68
|
+
* Check if a keyword is present in text
|
|
69
|
+
*/
|
|
70
|
+
private _checkKeywordPresence;
|
|
71
|
+
/**
|
|
72
|
+
* Override score to handle dynamic keyword extraction
|
|
73
|
+
*/
|
|
74
|
+
score(input: ScorerInput): Promise<ScoreResult>;
|
|
75
|
+
/**
|
|
76
|
+
* Calculate detailed coverage information
|
|
77
|
+
*/
|
|
78
|
+
private _calculateCoverageDetails;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Factory function for creating KeywordCoverageScorer instances
|
|
82
|
+
*/
|
|
83
|
+
export declare function createKeywordCoverageScorer(config?: KeywordCoverageConfig): Promise<KeywordCoverageScorer>;
|