@juspay/neurolink 9.42.0 → 9.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/dist/auth/anthropicOAuth.js +12 -0
- package/dist/browser/neurolink.min.js +335 -334
- package/dist/cli/commands/mcp.d.ts +6 -0
- package/dist/cli/commands/mcp.js +200 -184
- package/dist/cli/commands/proxy.js +560 -518
- package/dist/core/baseProvider.d.ts +6 -1
- package/dist/core/baseProvider.js +219 -232
- package/dist/core/factory.d.ts +3 -0
- package/dist/core/factory.js +140 -190
- package/dist/core/modules/ToolsManager.d.ts +1 -0
- package/dist/core/modules/ToolsManager.js +40 -42
- package/dist/core/toolEvents.d.ts +3 -0
- package/dist/core/toolEvents.js +7 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +5 -2
- package/dist/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/evaluation/scorers/scorerRegistry.js +356 -284
- package/dist/lib/auth/anthropicOAuth.js +12 -0
- package/dist/lib/core/baseProvider.d.ts +6 -1
- package/dist/lib/core/baseProvider.js +219 -232
- package/dist/lib/core/factory.d.ts +3 -0
- package/dist/lib/core/factory.js +140 -190
- package/dist/lib/core/modules/ToolsManager.d.ts +1 -0
- package/dist/lib/core/modules/ToolsManager.js +40 -42
- package/dist/lib/core/toolEvents.d.ts +3 -0
- package/dist/lib/core/toolEvents.js +8 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +5 -2
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +3 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +356 -284
- package/dist/lib/mcp/toolRegistry.d.ts +2 -0
- package/dist/lib/mcp/toolRegistry.js +32 -31
- package/dist/lib/neurolink.d.ts +38 -0
- package/dist/lib/neurolink.js +1890 -1707
- package/dist/lib/providers/googleAiStudio.js +0 -5
- package/dist/lib/providers/googleNativeGemini3.d.ts +4 -0
- package/dist/lib/providers/googleNativeGemini3.js +39 -1
- package/dist/lib/providers/googleVertex.d.ts +10 -0
- package/dist/lib/providers/googleVertex.js +445 -445
- package/dist/lib/providers/litellm.d.ts +1 -0
- package/dist/lib/providers/litellm.js +73 -64
- package/dist/lib/providers/ollama.js +17 -4
- package/dist/lib/providers/openAI.d.ts +2 -0
- package/dist/lib/providers/openAI.js +139 -140
- package/dist/lib/proxy/claudeFormat.js +14 -5
- package/dist/lib/proxy/oauthFetch.js +298 -318
- package/dist/lib/proxy/proxyConfig.js +3 -1
- package/dist/lib/proxy/proxyFetch.js +250 -222
- package/dist/lib/proxy/proxyHealth.d.ts +17 -0
- package/dist/lib/proxy/proxyHealth.js +55 -0
- package/dist/lib/proxy/requestLogger.js +140 -48
- package/dist/lib/proxy/routingPolicy.d.ts +33 -0
- package/dist/lib/proxy/routingPolicy.js +255 -0
- package/dist/lib/proxy/snapshotPersistence.d.ts +2 -0
- package/dist/lib/proxy/snapshotPersistence.js +41 -0
- package/dist/lib/proxy/sseInterceptor.js +36 -11
- package/dist/lib/server/routes/claudeProxyRoutes.d.ts +2 -1
- package/dist/lib/server/routes/claudeProxyRoutes.js +2916 -2377
- package/dist/lib/services/server/ai/observability/instrumentation.js +194 -218
- package/dist/lib/tasks/backends/bullmqBackend.js +24 -18
- package/dist/lib/tasks/store/redisTaskStore.js +42 -17
- package/dist/lib/tasks/taskManager.d.ts +2 -0
- package/dist/lib/tasks/taskManager.js +100 -5
- package/dist/lib/telemetry/telemetryService.js +9 -5
- package/dist/lib/types/cli.d.ts +4 -0
- package/dist/lib/types/proxyTypes.d.ts +211 -1
- package/dist/lib/types/tools.d.ts +18 -0
- package/dist/lib/utils/providerHealth.d.ts +1 -0
- package/dist/lib/utils/providerHealth.js +46 -31
- package/dist/lib/utils/providerUtils.js +11 -22
- package/dist/lib/utils/schemaConversion.d.ts +1 -0
- package/dist/lib/utils/schemaConversion.js +3 -0
- package/dist/mcp/toolRegistry.d.ts +2 -0
- package/dist/mcp/toolRegistry.js +32 -31
- package/dist/neurolink.d.ts +38 -0
- package/dist/neurolink.js +1890 -1707
- package/dist/providers/googleAiStudio.js +0 -5
- package/dist/providers/googleNativeGemini3.d.ts +4 -0
- package/dist/providers/googleNativeGemini3.js +39 -1
- package/dist/providers/googleVertex.d.ts +10 -0
- package/dist/providers/googleVertex.js +445 -445
- package/dist/providers/litellm.d.ts +1 -0
- package/dist/providers/litellm.js +73 -64
- package/dist/providers/ollama.js +17 -4
- package/dist/providers/openAI.d.ts +2 -0
- package/dist/providers/openAI.js +139 -140
- package/dist/proxy/claudeFormat.js +14 -5
- package/dist/proxy/oauthFetch.js +298 -318
- package/dist/proxy/proxyConfig.js +3 -1
- package/dist/proxy/proxyFetch.js +250 -222
- package/dist/proxy/proxyHealth.d.ts +17 -0
- package/dist/proxy/proxyHealth.js +54 -0
- package/dist/proxy/requestLogger.js +140 -48
- package/dist/proxy/routingPolicy.d.ts +33 -0
- package/dist/proxy/routingPolicy.js +254 -0
- package/dist/proxy/snapshotPersistence.d.ts +2 -0
- package/dist/proxy/snapshotPersistence.js +40 -0
- package/dist/proxy/sseInterceptor.js +36 -11
- package/dist/server/routes/claudeProxyRoutes.d.ts +2 -1
- package/dist/server/routes/claudeProxyRoutes.js +2916 -2377
- package/dist/services/server/ai/observability/instrumentation.js +194 -218
- package/dist/tasks/backends/bullmqBackend.js +24 -18
- package/dist/tasks/store/redisTaskStore.js +42 -17
- package/dist/tasks/taskManager.d.ts +2 -0
- package/dist/tasks/taskManager.js +100 -5
- package/dist/telemetry/telemetryService.js +9 -5
- package/dist/types/cli.d.ts +4 -0
- package/dist/types/proxyTypes.d.ts +211 -1
- package/dist/types/tools.d.ts +18 -0
- package/dist/utils/providerHealth.d.ts +1 -0
- package/dist/utils/providerHealth.js +46 -31
- package/dist/utils/providerUtils.js +12 -22
- package/dist/utils/schemaConversion.d.ts +1 -0
- package/dist/utils/schemaConversion.js +3 -0
- package/package.json +3 -2
- package/scripts/observability/check-proxy-telemetry.mjs +1 -1
- package/scripts/observability/manage-local-openobserve.sh +36 -5
|
@@ -3,6 +3,346 @@
|
|
|
3
3
|
* Follows NeuroLink's factory + registry pattern with dynamic imports
|
|
4
4
|
*/
|
|
5
5
|
import { logger } from "../../utils/logger.js";
|
|
6
|
+
const BUILT_IN_LLM_SCORERS = [
|
|
7
|
+
{
|
|
8
|
+
metadata: {
|
|
9
|
+
id: "hallucination",
|
|
10
|
+
name: "Hallucination Detection",
|
|
11
|
+
description: "Detects factual errors, fabrications, and unsupported claims in responses",
|
|
12
|
+
type: "llm",
|
|
13
|
+
category: "accuracy",
|
|
14
|
+
version: "1.0.0",
|
|
15
|
+
defaultConfig: {
|
|
16
|
+
enabled: true,
|
|
17
|
+
threshold: 0.8,
|
|
18
|
+
weight: 1.5,
|
|
19
|
+
timeout: 30000,
|
|
20
|
+
retries: 2,
|
|
21
|
+
},
|
|
22
|
+
requiredInputs: ["query", "response"],
|
|
23
|
+
optionalInputs: ["context", "groundTruth"],
|
|
24
|
+
},
|
|
25
|
+
factory: async (config) => {
|
|
26
|
+
const { HallucinationScorer } = await import("./llm/hallucinationScorer.js");
|
|
27
|
+
return new HallucinationScorer(config);
|
|
28
|
+
},
|
|
29
|
+
aliases: ["hallucination-detection", "hallucinations"],
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
metadata: {
|
|
33
|
+
id: "toxicity",
|
|
34
|
+
name: "Toxicity Analysis",
|
|
35
|
+
description: "Detects harmful, offensive, or inappropriate content in responses",
|
|
36
|
+
type: "llm",
|
|
37
|
+
category: "safety",
|
|
38
|
+
version: "1.0.0",
|
|
39
|
+
defaultConfig: {
|
|
40
|
+
enabled: true,
|
|
41
|
+
threshold: 0.9,
|
|
42
|
+
weight: 2.0,
|
|
43
|
+
timeout: 20000,
|
|
44
|
+
retries: 1,
|
|
45
|
+
},
|
|
46
|
+
requiredInputs: ["response"],
|
|
47
|
+
optionalInputs: ["query"],
|
|
48
|
+
},
|
|
49
|
+
factory: async (config) => {
|
|
50
|
+
const { ToxicityScorer } = await import("./llm/toxicityScorer.js");
|
|
51
|
+
return new ToxicityScorer(config);
|
|
52
|
+
},
|
|
53
|
+
aliases: ["toxic", "safety"],
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
metadata: {
|
|
57
|
+
id: "faithfulness",
|
|
58
|
+
name: "Faithfulness",
|
|
59
|
+
description: "Evaluates if the response is faithfully grounded in provided context",
|
|
60
|
+
type: "llm",
|
|
61
|
+
category: "faithfulness",
|
|
62
|
+
version: "1.0.0",
|
|
63
|
+
defaultConfig: {
|
|
64
|
+
enabled: true,
|
|
65
|
+
threshold: 0.7,
|
|
66
|
+
weight: 1.2,
|
|
67
|
+
timeout: 30000,
|
|
68
|
+
retries: 2,
|
|
69
|
+
},
|
|
70
|
+
requiredInputs: ["response", "context"],
|
|
71
|
+
optionalInputs: ["query"],
|
|
72
|
+
},
|
|
73
|
+
factory: async (config) => {
|
|
74
|
+
const { FaithfulnessScorer } = await import("./llm/faithfulnessScorer.js");
|
|
75
|
+
return new FaithfulnessScorer(config);
|
|
76
|
+
},
|
|
77
|
+
aliases: ["faithful", "grounding"],
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
metadata: {
|
|
81
|
+
id: "context-relevancy",
|
|
82
|
+
name: "Context Relevancy",
|
|
83
|
+
description: "Evaluates how relevant the retrieved context is to the user query",
|
|
84
|
+
type: "llm",
|
|
85
|
+
category: "relevancy",
|
|
86
|
+
version: "1.0.0",
|
|
87
|
+
defaultConfig: {
|
|
88
|
+
enabled: true,
|
|
89
|
+
threshold: 0.6,
|
|
90
|
+
weight: 1.0,
|
|
91
|
+
timeout: 25000,
|
|
92
|
+
retries: 2,
|
|
93
|
+
},
|
|
94
|
+
requiredInputs: ["query", "context"],
|
|
95
|
+
optionalInputs: ["response"],
|
|
96
|
+
},
|
|
97
|
+
factory: async (config) => {
|
|
98
|
+
const { ContextRelevancyScorer } = await import("./llm/contextRelevancyScorer.js");
|
|
99
|
+
return new ContextRelevancyScorer(config);
|
|
100
|
+
},
|
|
101
|
+
aliases: ["context-relevance"],
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
metadata: {
|
|
105
|
+
id: "answer-relevancy",
|
|
106
|
+
name: "Answer Relevancy",
|
|
107
|
+
description: "Evaluates how relevant the AI response is to the user query",
|
|
108
|
+
type: "llm",
|
|
109
|
+
category: "relevancy",
|
|
110
|
+
version: "1.0.0",
|
|
111
|
+
defaultConfig: {
|
|
112
|
+
enabled: true,
|
|
113
|
+
threshold: 0.7,
|
|
114
|
+
weight: 1.0,
|
|
115
|
+
timeout: 25000,
|
|
116
|
+
retries: 2,
|
|
117
|
+
},
|
|
118
|
+
requiredInputs: ["query", "response"],
|
|
119
|
+
optionalInputs: ["context"],
|
|
120
|
+
},
|
|
121
|
+
factory: async (config) => {
|
|
122
|
+
const { AnswerRelevancyScorer } = await import("./llm/answerRelevancyScorer.js");
|
|
123
|
+
return new AnswerRelevancyScorer(config);
|
|
124
|
+
},
|
|
125
|
+
aliases: ["response-relevancy", "relevancy"],
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
metadata: {
|
|
129
|
+
id: "context-precision",
|
|
130
|
+
name: "Context Precision",
|
|
131
|
+
description: "Measures the precision of retrieved context - whether relevant chunks are ranked higher",
|
|
132
|
+
type: "llm",
|
|
133
|
+
category: "relevancy",
|
|
134
|
+
version: "1.0.0",
|
|
135
|
+
defaultConfig: {
|
|
136
|
+
enabled: true,
|
|
137
|
+
threshold: 0.6,
|
|
138
|
+
weight: 0.8,
|
|
139
|
+
timeout: 25000,
|
|
140
|
+
retries: 2,
|
|
141
|
+
},
|
|
142
|
+
requiredInputs: ["query", "context"],
|
|
143
|
+
optionalInputs: ["groundTruth"],
|
|
144
|
+
},
|
|
145
|
+
factory: async (config) => {
|
|
146
|
+
const { ContextPrecisionScorer } = await import("./llm/contextPrecisionScorer.js");
|
|
147
|
+
return new ContextPrecisionScorer(config);
|
|
148
|
+
},
|
|
149
|
+
aliases: ["precision"],
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
metadata: {
|
|
153
|
+
id: "bias-detection",
|
|
154
|
+
name: "Bias Detection",
|
|
155
|
+
description: "Identifies potential biases in AI responses",
|
|
156
|
+
type: "llm",
|
|
157
|
+
category: "safety",
|
|
158
|
+
version: "1.0.0",
|
|
159
|
+
defaultConfig: {
|
|
160
|
+
enabled: true,
|
|
161
|
+
threshold: 0.8,
|
|
162
|
+
weight: 1.0,
|
|
163
|
+
timeout: 25000,
|
|
164
|
+
retries: 2,
|
|
165
|
+
},
|
|
166
|
+
requiredInputs: ["response"],
|
|
167
|
+
optionalInputs: ["query", "context"],
|
|
168
|
+
},
|
|
169
|
+
factory: async (config) => {
|
|
170
|
+
const { BiasDetectionScorer } = await import("./llm/biasDetectionScorer.js");
|
|
171
|
+
return new BiasDetectionScorer(config);
|
|
172
|
+
},
|
|
173
|
+
aliases: ["bias", "fairness"],
|
|
174
|
+
},
|
|
175
|
+
{
|
|
176
|
+
metadata: {
|
|
177
|
+
id: "tone-consistency",
|
|
178
|
+
name: "Tone Consistency",
|
|
179
|
+
description: "Checks for consistent tone throughout the response",
|
|
180
|
+
type: "llm",
|
|
181
|
+
category: "quality",
|
|
182
|
+
version: "1.0.0",
|
|
183
|
+
defaultConfig: {
|
|
184
|
+
enabled: true,
|
|
185
|
+
threshold: 0.7,
|
|
186
|
+
weight: 0.8,
|
|
187
|
+
timeout: 20000,
|
|
188
|
+
retries: 1,
|
|
189
|
+
},
|
|
190
|
+
requiredInputs: ["response"],
|
|
191
|
+
optionalInputs: ["query"],
|
|
192
|
+
},
|
|
193
|
+
factory: async (config) => {
|
|
194
|
+
const { ToneConsistencyScorer } = await import("./llm/toneConsistencyScorer.js");
|
|
195
|
+
return new ToneConsistencyScorer(config);
|
|
196
|
+
},
|
|
197
|
+
aliases: ["tone"],
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
metadata: {
|
|
201
|
+
id: "prompt-alignment",
|
|
202
|
+
name: "Prompt Alignment",
|
|
203
|
+
description: "Measures how well the response aligns with prompt instructions",
|
|
204
|
+
type: "llm",
|
|
205
|
+
category: "quality",
|
|
206
|
+
version: "1.0.0",
|
|
207
|
+
defaultConfig: {
|
|
208
|
+
enabled: true,
|
|
209
|
+
threshold: 0.7,
|
|
210
|
+
weight: 1.0,
|
|
211
|
+
timeout: 25000,
|
|
212
|
+
retries: 2,
|
|
213
|
+
},
|
|
214
|
+
requiredInputs: ["query", "response"],
|
|
215
|
+
optionalInputs: [],
|
|
216
|
+
},
|
|
217
|
+
factory: async (config) => {
|
|
218
|
+
const { PromptAlignmentScorer } = await import("./llm/promptAlignmentScorer.js");
|
|
219
|
+
return new PromptAlignmentScorer(config);
|
|
220
|
+
},
|
|
221
|
+
aliases: ["alignment", "instruction-following"],
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
metadata: {
|
|
225
|
+
id: "summarization",
|
|
226
|
+
name: "Summarization Quality",
|
|
227
|
+
description: "Evaluates the quality of AI-generated summaries",
|
|
228
|
+
type: "llm",
|
|
229
|
+
category: "quality",
|
|
230
|
+
version: "1.0.0",
|
|
231
|
+
defaultConfig: {
|
|
232
|
+
enabled: true,
|
|
233
|
+
threshold: 0.7,
|
|
234
|
+
weight: 1.0,
|
|
235
|
+
timeout: 25000,
|
|
236
|
+
retries: 2,
|
|
237
|
+
},
|
|
238
|
+
requiredInputs: ["response", "context"],
|
|
239
|
+
optionalInputs: ["query"],
|
|
240
|
+
},
|
|
241
|
+
factory: async (config) => {
|
|
242
|
+
const { SummarizationScorer } = await import("./llm/summarizationScorer.js");
|
|
243
|
+
return new SummarizationScorer(config);
|
|
244
|
+
},
|
|
245
|
+
aliases: ["summary"],
|
|
246
|
+
},
|
|
247
|
+
];
|
|
248
|
+
const BUILT_IN_RULE_SCORERS = [
|
|
249
|
+
{
|
|
250
|
+
metadata: {
|
|
251
|
+
id: "keyword-coverage",
|
|
252
|
+
name: "Keyword Coverage",
|
|
253
|
+
description: "Checks if response covers expected keywords and concepts",
|
|
254
|
+
type: "rule",
|
|
255
|
+
category: "quality",
|
|
256
|
+
version: "1.0.0",
|
|
257
|
+
defaultConfig: {
|
|
258
|
+
enabled: true,
|
|
259
|
+
threshold: 0.6,
|
|
260
|
+
weight: 0.8,
|
|
261
|
+
timeout: 1000,
|
|
262
|
+
retries: 0,
|
|
263
|
+
},
|
|
264
|
+
requiredInputs: ["response"],
|
|
265
|
+
optionalInputs: ["query", "custom"],
|
|
266
|
+
},
|
|
267
|
+
factory: async (config) => {
|
|
268
|
+
const { KeywordCoverageScorer } = await import("./rule/keywordCoverageScorer.js");
|
|
269
|
+
return new KeywordCoverageScorer(config);
|
|
270
|
+
},
|
|
271
|
+
aliases: ["keywords"],
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
metadata: {
|
|
275
|
+
id: "content-similarity",
|
|
276
|
+
name: "Content Similarity",
|
|
277
|
+
description: "Measures text similarity between response and reference",
|
|
278
|
+
type: "rule",
|
|
279
|
+
category: "accuracy",
|
|
280
|
+
version: "1.0.0",
|
|
281
|
+
defaultConfig: {
|
|
282
|
+
enabled: true,
|
|
283
|
+
threshold: 0.5,
|
|
284
|
+
weight: 1.0,
|
|
285
|
+
timeout: 2000,
|
|
286
|
+
retries: 0,
|
|
287
|
+
},
|
|
288
|
+
requiredInputs: ["response", "groundTruth"],
|
|
289
|
+
optionalInputs: [],
|
|
290
|
+
},
|
|
291
|
+
factory: async (config) => {
|
|
292
|
+
const { ContentSimilarityScorer } = await import("./rule/contentSimilarityScorer.js");
|
|
293
|
+
return new ContentSimilarityScorer(config);
|
|
294
|
+
},
|
|
295
|
+
aliases: ["similarity", "text-similarity"],
|
|
296
|
+
},
|
|
297
|
+
{
|
|
298
|
+
metadata: {
|
|
299
|
+
id: "length",
|
|
300
|
+
name: "Response Length",
|
|
301
|
+
description: "Validates response length against configured bounds",
|
|
302
|
+
type: "rule",
|
|
303
|
+
category: "quality",
|
|
304
|
+
version: "1.0.0",
|
|
305
|
+
defaultConfig: {
|
|
306
|
+
enabled: true,
|
|
307
|
+
threshold: 0.8,
|
|
308
|
+
weight: 0.5,
|
|
309
|
+
timeout: 100,
|
|
310
|
+
retries: 0,
|
|
311
|
+
},
|
|
312
|
+
requiredInputs: ["response"],
|
|
313
|
+
optionalInputs: [],
|
|
314
|
+
},
|
|
315
|
+
factory: async (config) => {
|
|
316
|
+
const { LengthScorer } = await import("./rule/lengthScorer.js");
|
|
317
|
+
return new LengthScorer(config);
|
|
318
|
+
},
|
|
319
|
+
aliases: ["response-length"],
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
metadata: {
|
|
323
|
+
id: "format",
|
|
324
|
+
name: "Format Validation",
|
|
325
|
+
description: "Checks if response follows expected formatting requirements",
|
|
326
|
+
type: "rule",
|
|
327
|
+
category: "quality",
|
|
328
|
+
version: "1.0.0",
|
|
329
|
+
defaultConfig: {
|
|
330
|
+
enabled: true,
|
|
331
|
+
threshold: 0.8,
|
|
332
|
+
weight: 0.5,
|
|
333
|
+
timeout: 100,
|
|
334
|
+
retries: 0,
|
|
335
|
+
},
|
|
336
|
+
requiredInputs: ["response"],
|
|
337
|
+
optionalInputs: ["custom"],
|
|
338
|
+
},
|
|
339
|
+
factory: async (config) => {
|
|
340
|
+
const { FormatScorer } = await import("./rule/formatScorer.js");
|
|
341
|
+
return new FormatScorer(config);
|
|
342
|
+
},
|
|
343
|
+
aliases: ["formatting"],
|
|
344
|
+
},
|
|
345
|
+
];
|
|
6
346
|
/**
|
|
7
347
|
* Central registry for all scorers
|
|
8
348
|
* Manages registration, discovery, and instantiation
|
|
@@ -44,6 +384,17 @@ export class ScorerRegistry {
|
|
|
44
384
|
aliases,
|
|
45
385
|
});
|
|
46
386
|
}
|
|
387
|
+
static registerScorerDefinitions(definitions) {
|
|
388
|
+
for (const definition of definitions) {
|
|
389
|
+
ScorerRegistry.registerScorer(definition.metadata, definition.factory, definition.aliases || []);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
static registerBuiltInLLMScorers() {
|
|
393
|
+
ScorerRegistry.registerScorerDefinitions(BUILT_IN_LLM_SCORERS);
|
|
394
|
+
}
|
|
395
|
+
static registerBuiltInRuleScorers() {
|
|
396
|
+
ScorerRegistry.registerScorerDefinitions(BUILT_IN_RULE_SCORERS);
|
|
397
|
+
}
|
|
47
398
|
/**
|
|
48
399
|
* Register built-in scorers using dynamic imports
|
|
49
400
|
*/
|
|
@@ -56,293 +407,14 @@ export class ScorerRegistry {
|
|
|
56
407
|
}
|
|
57
408
|
ScorerRegistry.initPromise = (async () => {
|
|
58
409
|
try {
|
|
59
|
-
|
|
60
|
-
ScorerRegistry.
|
|
61
|
-
id: "hallucination",
|
|
62
|
-
name: "Hallucination Detection",
|
|
63
|
-
description: "Detects factual errors, fabrications, and unsupported claims in responses",
|
|
64
|
-
type: "llm",
|
|
65
|
-
category: "accuracy",
|
|
66
|
-
version: "1.0.0",
|
|
67
|
-
defaultConfig: {
|
|
68
|
-
enabled: true,
|
|
69
|
-
threshold: 0.8,
|
|
70
|
-
weight: 1.5,
|
|
71
|
-
timeout: 30000,
|
|
72
|
-
retries: 2,
|
|
73
|
-
},
|
|
74
|
-
requiredInputs: ["query", "response"],
|
|
75
|
-
optionalInputs: ["context", "groundTruth"],
|
|
76
|
-
}, async (config) => {
|
|
77
|
-
const { HallucinationScorer } = await import("./llm/hallucinationScorer.js");
|
|
78
|
-
return new HallucinationScorer(config);
|
|
79
|
-
}, ["hallucination-detection", "hallucinations"]);
|
|
80
|
-
ScorerRegistry.registerScorer({
|
|
81
|
-
id: "toxicity",
|
|
82
|
-
name: "Toxicity Analysis",
|
|
83
|
-
description: "Detects harmful, offensive, or inappropriate content in responses",
|
|
84
|
-
type: "llm",
|
|
85
|
-
category: "safety",
|
|
86
|
-
version: "1.0.0",
|
|
87
|
-
defaultConfig: {
|
|
88
|
-
enabled: true,
|
|
89
|
-
threshold: 0.9,
|
|
90
|
-
weight: 2.0,
|
|
91
|
-
timeout: 20000,
|
|
92
|
-
retries: 1,
|
|
93
|
-
},
|
|
94
|
-
requiredInputs: ["response"],
|
|
95
|
-
optionalInputs: ["query"],
|
|
96
|
-
}, async (config) => {
|
|
97
|
-
const { ToxicityScorer } = await import("./llm/toxicityScorer.js");
|
|
98
|
-
return new ToxicityScorer(config);
|
|
99
|
-
}, ["toxic", "safety"]);
|
|
100
|
-
ScorerRegistry.registerScorer({
|
|
101
|
-
id: "faithfulness",
|
|
102
|
-
name: "Faithfulness",
|
|
103
|
-
description: "Evaluates if the response is faithfully grounded in provided context",
|
|
104
|
-
type: "llm",
|
|
105
|
-
category: "faithfulness",
|
|
106
|
-
version: "1.0.0",
|
|
107
|
-
defaultConfig: {
|
|
108
|
-
enabled: true,
|
|
109
|
-
threshold: 0.7,
|
|
110
|
-
weight: 1.2,
|
|
111
|
-
timeout: 30000,
|
|
112
|
-
retries: 2,
|
|
113
|
-
},
|
|
114
|
-
requiredInputs: ["response", "context"],
|
|
115
|
-
optionalInputs: ["query"],
|
|
116
|
-
}, async (config) => {
|
|
117
|
-
const { FaithfulnessScorer } = await import("./llm/faithfulnessScorer.js");
|
|
118
|
-
return new FaithfulnessScorer(config);
|
|
119
|
-
}, ["faithful", "grounding"]);
|
|
120
|
-
ScorerRegistry.registerScorer({
|
|
121
|
-
id: "context-relevancy",
|
|
122
|
-
name: "Context Relevancy",
|
|
123
|
-
description: "Evaluates how relevant the retrieved context is to the user query",
|
|
124
|
-
type: "llm",
|
|
125
|
-
category: "relevancy",
|
|
126
|
-
version: "1.0.0",
|
|
127
|
-
defaultConfig: {
|
|
128
|
-
enabled: true,
|
|
129
|
-
threshold: 0.6,
|
|
130
|
-
weight: 1.0,
|
|
131
|
-
timeout: 25000,
|
|
132
|
-
retries: 2,
|
|
133
|
-
},
|
|
134
|
-
requiredInputs: ["query", "context"],
|
|
135
|
-
optionalInputs: ["response"],
|
|
136
|
-
}, async (config) => {
|
|
137
|
-
const { ContextRelevancyScorer } = await import("./llm/contextRelevancyScorer.js");
|
|
138
|
-
return new ContextRelevancyScorer(config);
|
|
139
|
-
}, ["context-relevance"]);
|
|
140
|
-
ScorerRegistry.registerScorer({
|
|
141
|
-
id: "answer-relevancy",
|
|
142
|
-
name: "Answer Relevancy",
|
|
143
|
-
description: "Evaluates how relevant the AI response is to the user query",
|
|
144
|
-
type: "llm",
|
|
145
|
-
category: "relevancy",
|
|
146
|
-
version: "1.0.0",
|
|
147
|
-
defaultConfig: {
|
|
148
|
-
enabled: true,
|
|
149
|
-
threshold: 0.7,
|
|
150
|
-
weight: 1.0,
|
|
151
|
-
timeout: 25000,
|
|
152
|
-
retries: 2,
|
|
153
|
-
},
|
|
154
|
-
requiredInputs: ["query", "response"],
|
|
155
|
-
optionalInputs: ["context"],
|
|
156
|
-
}, async (config) => {
|
|
157
|
-
const { AnswerRelevancyScorer } = await import("./llm/answerRelevancyScorer.js");
|
|
158
|
-
return new AnswerRelevancyScorer(config);
|
|
159
|
-
}, ["response-relevancy", "relevancy"]);
|
|
160
|
-
ScorerRegistry.registerScorer({
|
|
161
|
-
id: "context-precision",
|
|
162
|
-
name: "Context Precision",
|
|
163
|
-
description: "Measures the precision of retrieved context - whether relevant chunks are ranked higher",
|
|
164
|
-
type: "llm",
|
|
165
|
-
category: "relevancy",
|
|
166
|
-
version: "1.0.0",
|
|
167
|
-
defaultConfig: {
|
|
168
|
-
enabled: true,
|
|
169
|
-
threshold: 0.6,
|
|
170
|
-
weight: 0.8,
|
|
171
|
-
timeout: 25000,
|
|
172
|
-
retries: 2,
|
|
173
|
-
},
|
|
174
|
-
requiredInputs: ["query", "context"],
|
|
175
|
-
optionalInputs: ["groundTruth"],
|
|
176
|
-
}, async (config) => {
|
|
177
|
-
const { ContextPrecisionScorer } = await import("./llm/contextPrecisionScorer.js");
|
|
178
|
-
return new ContextPrecisionScorer(config);
|
|
179
|
-
}, ["precision"]);
|
|
180
|
-
ScorerRegistry.registerScorer({
|
|
181
|
-
id: "bias-detection",
|
|
182
|
-
name: "Bias Detection",
|
|
183
|
-
description: "Identifies potential biases in AI responses",
|
|
184
|
-
type: "llm",
|
|
185
|
-
category: "safety",
|
|
186
|
-
version: "1.0.0",
|
|
187
|
-
defaultConfig: {
|
|
188
|
-
enabled: true,
|
|
189
|
-
threshold: 0.8,
|
|
190
|
-
weight: 1.0,
|
|
191
|
-
timeout: 25000,
|
|
192
|
-
retries: 2,
|
|
193
|
-
},
|
|
194
|
-
requiredInputs: ["response"],
|
|
195
|
-
optionalInputs: ["query", "context"],
|
|
196
|
-
}, async (config) => {
|
|
197
|
-
const { BiasDetectionScorer } = await import("./llm/biasDetectionScorer.js");
|
|
198
|
-
return new BiasDetectionScorer(config);
|
|
199
|
-
}, ["bias", "fairness"]);
|
|
200
|
-
ScorerRegistry.registerScorer({
|
|
201
|
-
id: "tone-consistency",
|
|
202
|
-
name: "Tone Consistency",
|
|
203
|
-
description: "Checks for consistent tone throughout the response",
|
|
204
|
-
type: "llm",
|
|
205
|
-
category: "quality",
|
|
206
|
-
version: "1.0.0",
|
|
207
|
-
defaultConfig: {
|
|
208
|
-
enabled: true,
|
|
209
|
-
threshold: 0.7,
|
|
210
|
-
weight: 0.8,
|
|
211
|
-
timeout: 20000,
|
|
212
|
-
retries: 1,
|
|
213
|
-
},
|
|
214
|
-
requiredInputs: ["response"],
|
|
215
|
-
optionalInputs: ["query"],
|
|
216
|
-
}, async (config) => {
|
|
217
|
-
const { ToneConsistencyScorer } = await import("./llm/toneConsistencyScorer.js");
|
|
218
|
-
return new ToneConsistencyScorer(config);
|
|
219
|
-
}, ["tone"]);
|
|
220
|
-
ScorerRegistry.registerScorer({
|
|
221
|
-
id: "prompt-alignment",
|
|
222
|
-
name: "Prompt Alignment",
|
|
223
|
-
description: "Measures how well the response aligns with prompt instructions",
|
|
224
|
-
type: "llm",
|
|
225
|
-
category: "quality",
|
|
226
|
-
version: "1.0.0",
|
|
227
|
-
defaultConfig: {
|
|
228
|
-
enabled: true,
|
|
229
|
-
threshold: 0.7,
|
|
230
|
-
weight: 1.0,
|
|
231
|
-
timeout: 25000,
|
|
232
|
-
retries: 2,
|
|
233
|
-
},
|
|
234
|
-
requiredInputs: ["query", "response"],
|
|
235
|
-
optionalInputs: [],
|
|
236
|
-
}, async (config) => {
|
|
237
|
-
const { PromptAlignmentScorer } = await import("./llm/promptAlignmentScorer.js");
|
|
238
|
-
return new PromptAlignmentScorer(config);
|
|
239
|
-
}, ["alignment", "instruction-following"]);
|
|
240
|
-
ScorerRegistry.registerScorer({
|
|
241
|
-
id: "summarization",
|
|
242
|
-
name: "Summarization Quality",
|
|
243
|
-
description: "Evaluates the quality of AI-generated summaries",
|
|
244
|
-
type: "llm",
|
|
245
|
-
category: "quality",
|
|
246
|
-
version: "1.0.0",
|
|
247
|
-
defaultConfig: {
|
|
248
|
-
enabled: true,
|
|
249
|
-
threshold: 0.7,
|
|
250
|
-
weight: 1.0,
|
|
251
|
-
timeout: 25000,
|
|
252
|
-
retries: 2,
|
|
253
|
-
},
|
|
254
|
-
requiredInputs: ["response", "context"],
|
|
255
|
-
optionalInputs: ["query"],
|
|
256
|
-
}, async (config) => {
|
|
257
|
-
const { SummarizationScorer } = await import("./llm/summarizationScorer.js");
|
|
258
|
-
return new SummarizationScorer(config);
|
|
259
|
-
}, ["summary"]);
|
|
260
|
-
// Register rule-based scorers
|
|
261
|
-
ScorerRegistry.registerScorer({
|
|
262
|
-
id: "keyword-coverage",
|
|
263
|
-
name: "Keyword Coverage",
|
|
264
|
-
description: "Checks if response covers expected keywords and concepts",
|
|
265
|
-
type: "rule",
|
|
266
|
-
category: "quality",
|
|
267
|
-
version: "1.0.0",
|
|
268
|
-
defaultConfig: {
|
|
269
|
-
enabled: true,
|
|
270
|
-
threshold: 0.6,
|
|
271
|
-
weight: 0.8,
|
|
272
|
-
timeout: 1000,
|
|
273
|
-
retries: 0,
|
|
274
|
-
},
|
|
275
|
-
requiredInputs: ["response"],
|
|
276
|
-
optionalInputs: ["query", "custom"],
|
|
277
|
-
}, async (config) => {
|
|
278
|
-
const { KeywordCoverageScorer } = await import("./rule/keywordCoverageScorer.js");
|
|
279
|
-
return new KeywordCoverageScorer(config);
|
|
280
|
-
}, ["keywords"]);
|
|
281
|
-
ScorerRegistry.registerScorer({
|
|
282
|
-
id: "content-similarity",
|
|
283
|
-
name: "Content Similarity",
|
|
284
|
-
description: "Measures text similarity between response and reference",
|
|
285
|
-
type: "rule",
|
|
286
|
-
category: "accuracy",
|
|
287
|
-
version: "1.0.0",
|
|
288
|
-
defaultConfig: {
|
|
289
|
-
enabled: true,
|
|
290
|
-
threshold: 0.5,
|
|
291
|
-
weight: 1.0,
|
|
292
|
-
timeout: 2000,
|
|
293
|
-
retries: 0,
|
|
294
|
-
},
|
|
295
|
-
requiredInputs: ["response", "groundTruth"],
|
|
296
|
-
optionalInputs: [],
|
|
297
|
-
}, async (config) => {
|
|
298
|
-
const { ContentSimilarityScorer } = await import("./rule/contentSimilarityScorer.js");
|
|
299
|
-
return new ContentSimilarityScorer(config);
|
|
300
|
-
}, ["similarity", "text-similarity"]);
|
|
301
|
-
ScorerRegistry.registerScorer({
|
|
302
|
-
id: "length",
|
|
303
|
-
name: "Response Length",
|
|
304
|
-
description: "Validates response length against configured bounds",
|
|
305
|
-
type: "rule",
|
|
306
|
-
category: "quality",
|
|
307
|
-
version: "1.0.0",
|
|
308
|
-
defaultConfig: {
|
|
309
|
-
enabled: true,
|
|
310
|
-
threshold: 0.8,
|
|
311
|
-
weight: 0.5,
|
|
312
|
-
timeout: 100,
|
|
313
|
-
retries: 0,
|
|
314
|
-
},
|
|
315
|
-
requiredInputs: ["response"],
|
|
316
|
-
optionalInputs: [],
|
|
317
|
-
}, async (config) => {
|
|
318
|
-
const { LengthScorer } = await import("./rule/lengthScorer.js");
|
|
319
|
-
return new LengthScorer(config);
|
|
320
|
-
}, ["response-length"]);
|
|
321
|
-
ScorerRegistry.registerScorer({
|
|
322
|
-
id: "format",
|
|
323
|
-
name: "Format Validation",
|
|
324
|
-
description: "Checks if response follows expected formatting requirements",
|
|
325
|
-
type: "rule",
|
|
326
|
-
category: "quality",
|
|
327
|
-
version: "1.0.0",
|
|
328
|
-
defaultConfig: {
|
|
329
|
-
enabled: true,
|
|
330
|
-
threshold: 0.8,
|
|
331
|
-
weight: 0.5,
|
|
332
|
-
timeout: 100,
|
|
333
|
-
retries: 0,
|
|
334
|
-
},
|
|
335
|
-
requiredInputs: ["response"],
|
|
336
|
-
optionalInputs: ["custom"],
|
|
337
|
-
}, async (config) => {
|
|
338
|
-
const { FormatScorer } = await import("./rule/formatScorer.js");
|
|
339
|
-
return new FormatScorer(config);
|
|
340
|
-
}, ["formatting"]);
|
|
410
|
+
ScorerRegistry.registerBuiltInLLMScorers();
|
|
411
|
+
ScorerRegistry.registerBuiltInRuleScorers();
|
|
341
412
|
ScorerRegistry.initialized = true;
|
|
342
413
|
logger.debug(`Registered ${ScorerRegistry.scorers.size} built-in scorers (including aliases)`);
|
|
343
414
|
}
|
|
344
|
-
|
|
345
|
-
|
|
415
|
+
catch (err) {
|
|
416
|
+
ScorerRegistry.initPromise = null; // allow retry on next call
|
|
417
|
+
throw err;
|
|
346
418
|
}
|
|
347
419
|
})();
|
|
348
420
|
return ScorerRegistry.initPromise;
|
|
@@ -69,6 +69,8 @@ export declare class MCPToolRegistry extends MCPRegistry {
|
|
|
69
69
|
permissions?: string[];
|
|
70
70
|
context?: ExecutionContext;
|
|
71
71
|
}): Promise<ToolInfo[]>;
|
|
72
|
+
private resolveToolExecutionTarget;
|
|
73
|
+
private createExecutionContext;
|
|
72
74
|
/**
|
|
73
75
|
* Get tool information with server details
|
|
74
76
|
*/
|