@juspay/neurolink 9.36.1 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/types/index.d.ts +3 -1
  189. package/dist/lib/types/index.js +3 -2
  190. package/dist/lib/types/scorerTypes.d.ts +423 -0
  191. package/dist/lib/types/scorerTypes.js +6 -0
  192. package/dist/lib/utils/errorHandling.d.ts +20 -0
  193. package/dist/lib/utils/errorHandling.js +60 -0
  194. package/dist/neurolink.d.ts +204 -0
  195. package/dist/neurolink.js +296 -0
  196. package/dist/types/index.d.ts +3 -1
  197. package/dist/types/index.js +3 -2
  198. package/dist/types/scorerTypes.d.ts +423 -0
  199. package/dist/types/scorerTypes.js +5 -0
  200. package/dist/utils/errorHandling.d.ts +20 -0
  201. package/dist/utils/errorHandling.js +60 -0
  202. package/package.json +1 -1
@@ -0,0 +1,423 @@
1
+ /**
2
+ * @file Scorer type definitions for NeuroLink evaluation system
3
+ * Mastra-style modular scorer interfaces and types
4
+ */
5
+ import type { JsonObject } from "./common.js";
6
+ import type { EnhancedEvaluationContext } from "./evaluationTypes.js";
7
+ import type { GenerateResult } from "./generateTypes.js";
8
+ /**
9
+ * Scorer type classification
10
+ */
11
+ export type ScorerType = "llm" | "rule" | "hybrid";
12
+ /**
13
+ * Scorer categories for organization
14
+ */
15
+ export type ScorerCategory = "accuracy" | "relevancy" | "safety" | "quality" | "faithfulness" | "custom";
16
+ /**
17
+ * Score scale configuration
18
+ */
19
+ export type ScoreScale = {
20
+ /** Minimum score value */
21
+ min: number;
22
+ /** Maximum score value */
23
+ max: number;
24
+ /** Decimal precision for scores */
25
+ precision: number;
26
+ };
27
+ /**
28
+ * Individual score result from a scorer
29
+ */
30
+ export type ScoreResult = {
31
+ /** Unique identifier for the scorer */
32
+ scorerId: string;
33
+ /** Display name of the scorer */
34
+ scorerName: string;
35
+ /** Numeric score value */
36
+ score: number;
37
+ /** Normalized score (0-1 scale) */
38
+ normalizedScore: number;
39
+ /** Score scale used */
40
+ scale: ScoreScale;
41
+ /** Human-readable reasoning for the score */
42
+ reasoning: string;
43
+ /** Whether the score passes the threshold */
44
+ passed: boolean;
45
+ /** Threshold used for pass/fail determination */
46
+ threshold: number;
47
+ /** Confidence level (0-1) for LLM-based scores */
48
+ confidence?: number;
49
+ /** Additional metadata from the scorer */
50
+ metadata?: JsonObject;
51
+ /** Time taken to compute the score (ms) */
52
+ computeTime: number;
53
+ /** Error if scoring failed */
54
+ error?: string;
55
+ };
56
+ /**
57
+ * Aggregated scores from multiple scorers
58
+ */
59
+ export type AggregatedScores = {
60
+ /** Individual score results */
61
+ scores: ScoreResult[];
62
+ /** Overall aggregated score */
63
+ overallScore: number;
64
+ /** Aggregation method used */
65
+ aggregationMethod: AggregationMethod;
66
+ /** Whether overall evaluation passed */
67
+ passed: boolean;
68
+ /** Total computation time (ms) */
69
+ totalComputeTime: number;
70
+ /** Timestamp of evaluation */
71
+ timestamp: number;
72
+ /** Session/request ID for correlation */
73
+ correlationId?: string;
74
+ };
75
+ /**
76
+ * Aggregation method for combining scores
77
+ */
78
+ export type AggregationMethod = "average" | "weighted" | "minimum" | "maximum" | "custom";
79
+ /**
80
+ * Scorer configuration options
81
+ */
82
+ export type ScorerConfig = {
83
+ /** Whether the scorer is enabled */
84
+ enabled?: boolean;
85
+ /** Pass/fail threshold (0-1 normalized) */
86
+ threshold?: number;
87
+ /** Weight for weighted aggregation */
88
+ weight?: number;
89
+ /** Custom scorer-specific configuration */
90
+ options?: JsonObject;
91
+ /** Timeout for scorer execution (ms) */
92
+ timeout?: number;
93
+ /** Number of retry attempts */
94
+ retries?: number;
95
+ };
96
+ /**
97
+ * Input context for scorer execution
98
+ */
99
+ export type ScorerInput = {
100
+ /** The user's original query/prompt */
101
+ query: string;
102
+ /** The AI-generated response to evaluate */
103
+ response: string;
104
+ /** Retrieved context (for RAG evaluations) */
105
+ context?: string[];
106
+ /** Ground truth/expected answer (for accuracy checks) */
107
+ groundTruth?: string;
108
+ /** Full generation result with metadata */
109
+ generationResult?: GenerateResult;
110
+ /** Enhanced evaluation context */
111
+ evaluationContext?: EnhancedEvaluationContext;
112
+ /** Conversation history for multi-turn evaluation */
113
+ conversationHistory?: Array<{
114
+ role: string;
115
+ content: string;
116
+ }>;
117
+ /** Custom input data for specific scorers */
118
+ custom?: JsonObject;
119
+ };
120
+ /**
121
+ * Scorer metadata for registration
122
+ */
123
+ export type ScorerMetadata = {
124
+ /** Unique scorer identifier */
125
+ id: string;
126
+ /** Human-readable name */
127
+ name: string;
128
+ /** Description of what the scorer evaluates */
129
+ description: string;
130
+ /** Scorer type (llm, rule, hybrid) */
131
+ type: ScorerType;
132
+ /** Category for grouping */
133
+ category: ScorerCategory;
134
+ /** Version string */
135
+ version: string;
136
+ /** Default configuration */
137
+ defaultConfig: ScorerConfig;
138
+ /** Required input fields */
139
+ requiredInputs: (keyof ScorerInput)[];
140
+ /** Optional input fields */
141
+ optionalInputs: (keyof ScorerInput)[];
142
+ };
143
+ /**
144
+ * LLM-based scorer configuration
145
+ */
146
+ export type LLMScorerConfig = ScorerConfig & {
147
+ /** Model to use for scoring */
148
+ model?: string;
149
+ /** Provider for the scoring model */
150
+ provider?: string;
151
+ /** Temperature for LLM scoring */
152
+ temperature?: number;
153
+ /** Custom prompt template */
154
+ promptTemplate?: string;
155
+ /** Output schema for structured scoring */
156
+ outputSchema?: JsonObject;
157
+ };
158
+ /**
159
+ * Rule-based scorer configuration
160
+ */
161
+ export type RuleScorerConfig = ScorerConfig & {
162
+ /** Rules to apply */
163
+ rules?: ScorerRule[];
164
+ /** How to combine rule results */
165
+ ruleCombination?: "all" | "any" | "weighted";
166
+ };
167
+ /**
168
+ * Individual rule for rule-based scorers
169
+ */
170
+ export type ScorerRule = {
171
+ /** Rule identifier */
172
+ id: string;
173
+ /** Rule description */
174
+ description: string;
175
+ /** Rule type */
176
+ type: "regex" | "keyword" | "length" | "custom";
177
+ /** Rule parameters */
178
+ params: JsonObject;
179
+ /** Weight for this rule */
180
+ weight?: number;
181
+ };
182
+ /**
183
+ * Rule evaluation result
184
+ */
185
+ export type RuleResult = {
186
+ /** Rule identifier */
187
+ ruleId: string;
188
+ /** Whether the rule passed */
189
+ passed: boolean;
190
+ /** Score from this rule */
191
+ score: number;
192
+ /** Reasoning for the result */
193
+ reasoning?: string;
194
+ };
195
+ /**
196
+ * Scorer execution events for observability
197
+ */
198
+ export type ScorerEvent = {
199
+ /** Event type */
200
+ type: "scorer:start" | "scorer:end" | "scorer:error";
201
+ /** Scorer identifier */
202
+ scorerId: string;
203
+ /** Event timestamp */
204
+ timestamp: number;
205
+ /** Duration (for end events) */
206
+ duration?: number;
207
+ /** Score result (for end events) */
208
+ score?: number;
209
+ /** Error message (for error events) */
210
+ error?: string;
211
+ /** Additional metadata */
212
+ metadata?: JsonObject;
213
+ };
214
+ /**
215
+ * Scorer registry entry
216
+ */
217
+ export type ScorerRegistryEntry = {
218
+ /** Scorer metadata */
219
+ metadata: ScorerMetadata;
220
+ /** Factory function for creating scorer instances */
221
+ factory: ScorerFactory;
222
+ /** Default configuration */
223
+ defaultConfig: ScorerConfig;
224
+ /** Aliases for this scorer */
225
+ aliases?: string[];
226
+ };
227
+ /**
228
+ * Factory function for creating scorer instances
229
+ */
230
+ export type ScorerFactory = (config?: ScorerConfig) => Promise<Scorer>;
231
+ /**
232
+ * Core Scorer interface - all scorers must implement this
233
+ */
234
+ export type Scorer = {
235
+ /** Scorer metadata */
236
+ readonly metadata: ScorerMetadata;
237
+ /** Current configuration */
238
+ readonly config: ScorerConfig;
239
+ /**
240
+ * Execute the scorer and return a score result
241
+ * @param input - Input context for scoring
242
+ * @returns Score result
243
+ */
244
+ score(input: ScorerInput): Promise<ScoreResult>;
245
+ /**
246
+ * Validate that required inputs are present
247
+ * @param input - Input to validate
248
+ * @returns Validation result
249
+ */
250
+ validateInput(input: ScorerInput): {
251
+ valid: boolean;
252
+ errors: string[];
253
+ };
254
+ /**
255
+ * Update scorer configuration
256
+ * @param config - New configuration
257
+ */
258
+ configure(config: Partial<ScorerConfig>): void;
259
+ };
260
+ /**
261
+ * Extended interface for LLM-based scorers
262
+ */
263
+ export type LLMScorer = Scorer & {
264
+ /** LLM-specific configuration */
265
+ readonly llmConfig: LLMScorerConfig;
266
+ /**
267
+ * Generate the prompt for LLM scoring
268
+ * @param input - Scorer input
269
+ * @returns Prompt string
270
+ */
271
+ generatePrompt(input: ScorerInput): string;
272
+ /**
273
+ * Parse LLM response into score result
274
+ * @param response - Raw LLM response
275
+ * @param input - Original input
276
+ * @returns Parsed score result
277
+ */
278
+ parseResponse(response: string, input: ScorerInput): Partial<ScoreResult>;
279
+ };
280
+ /**
281
+ * Extended interface for rule-based scorers
282
+ */
283
+ export type RuleScorer = Scorer & {
284
+ /** Rule-specific configuration */
285
+ readonly ruleConfig: RuleScorerConfig;
286
+ /**
287
+ * Get all rules for this scorer
288
+ * @returns Array of rules
289
+ */
290
+ getRules(): ScorerRule[];
291
+ /**
292
+ * Evaluate a single rule
293
+ * @param rule - Rule to evaluate
294
+ * @param input - Scorer input
295
+ * @returns Rule result
296
+ */
297
+ evaluateRule(rule: ScorerRule, input: ScorerInput): {
298
+ passed: boolean;
299
+ score: number;
300
+ };
301
+ };
302
+ /**
303
+ * Pipeline configuration for multi-scorer evaluation
304
+ */
305
+ export type PipelineConfig = {
306
+ /** Pipeline name */
307
+ name?: string;
308
+ /** Pipeline description */
309
+ description?: string;
310
+ /** Scorers to run in the pipeline */
311
+ scorers: Array<{
312
+ id: string;
313
+ config?: ScorerConfig;
314
+ }>;
315
+ /** Aggregation configuration */
316
+ aggregation?: AggregationConfig;
317
+ /** Overall pass threshold */
318
+ passThreshold?: number;
319
+ /** Execution mode */
320
+ executionMode?: "parallel" | "sequential";
321
+ /** Stop on first failure */
322
+ stopOnFailure?: boolean;
323
+ /** Timeout for entire pipeline (ms) */
324
+ timeout?: number;
325
+ /** Required scorers that must pass */
326
+ requiredScorers?: string[];
327
+ };
328
+ /**
329
+ * Aggregation configuration
330
+ */
331
+ export type AggregationConfig = {
332
+ /** Aggregation method */
333
+ method: AggregationMethod;
334
+ /** Weights for weighted aggregation */
335
+ weights?: Record<string, number>;
336
+ /** Custom aggregation function */
337
+ customFn?: (scores: ScoreResult[]) => number;
338
+ };
339
+ /**
340
+ * Sampling configuration for cost-efficient evaluation
341
+ */
342
+ export type SamplingConfig = {
343
+ /** Sampling rate (0-1) */
344
+ rate: number;
345
+ /** Always evaluate certain conditions */
346
+ alwaysEvaluate?: {
347
+ /** Always evaluate errors */
348
+ errors?: boolean;
349
+ /** Always evaluate for certain users */
350
+ users?: string[];
351
+ /** Always evaluate certain tags */
352
+ tags?: string[];
353
+ };
354
+ /** Adaptive sampling configuration */
355
+ adaptive?: {
356
+ /** Enable adaptive sampling */
357
+ enabled: boolean;
358
+ /** Adjust rate based on quality */
359
+ qualityThreshold: number;
360
+ /** Minimum sampling rate */
361
+ minRate: number;
362
+ /** Maximum sampling rate */
363
+ maxRate: number;
364
+ };
365
+ };
366
+ /**
367
+ * Sampling decision result
368
+ */
369
+ export type SamplingDecision = {
370
+ /** Whether to sample this request */
371
+ shouldSample: boolean;
372
+ /** Reason for decision */
373
+ reason: string;
374
+ /** Current sampling rate */
375
+ currentRate: number;
376
+ };
377
+ /**
378
+ * Sampling context for adaptive sampling
379
+ */
380
+ export type SamplingContext = {
381
+ /** Recent quality scores */
382
+ recentScores?: number[];
383
+ /** User ID if available */
384
+ userId?: string;
385
+ /** Tags for this request */
386
+ tags?: string[];
387
+ /** Whether this request errored */
388
+ hasError?: boolean;
389
+ };
390
+ /**
391
+ * Evaluation trace context for observability
392
+ */
393
+ export type EvaluationTraceContext = {
394
+ /** Trace ID */
395
+ traceId: string;
396
+ /** Span ID */
397
+ spanId?: string;
398
+ /** Parent span ID */
399
+ parentSpanId?: string;
400
+ /** Session ID */
401
+ sessionId?: string;
402
+ /** User ID */
403
+ userId?: string;
404
+ /** Custom attributes */
405
+ attributes?: Record<string, string | number | boolean>;
406
+ };
407
+ /**
408
+ * Report format options
409
+ */
410
+ export type ReportFormat = "text" | "json" | "markdown" | "html";
411
+ /**
412
+ * Report configuration
413
+ */
414
+ export type ReportConfig = {
415
+ /** Report format */
416
+ format: ReportFormat;
417
+ /** Include detailed reasoning */
418
+ includeReasoning?: boolean;
419
+ /** Include metadata */
420
+ includeMetadata?: boolean;
421
+ /** Include timing information */
422
+ includeTiming?: boolean;
423
+ };
@@ -0,0 +1,5 @@
1
+ /**
2
+ * @file Scorer type definitions for NeuroLink evaluation system
3
+ * Mastra-style modular scorer interfaces and types
4
+ */
5
+ export {};
@@ -36,6 +36,10 @@ export declare const ERROR_CODES: {
36
36
  readonly RATE_LIMITER_QUEUE_FULL: "RATE_LIMITER_QUEUE_FULL";
37
37
  readonly RATE_LIMITER_QUEUE_TIMEOUT: "RATE_LIMITER_QUEUE_TIMEOUT";
38
38
  readonly RATE_LIMITER_RESET: "RATE_LIMITER_RESET";
39
+ readonly SCORER_NOT_FOUND: "SCORER_NOT_FOUND";
40
+ readonly EVALUATION_VALIDATION_FAILED: "EVALUATION_VALIDATION_FAILED";
41
+ readonly EVALUATION_TIMEOUT: "EVALUATION_TIMEOUT";
42
+ readonly EVALUATION_EXECUTION_FAILED: "EVALUATION_EXECUTION_FAILED";
39
43
  readonly MISSING_PPT_PROPERTIES: "MISSING_PPT_PROPERTIES";
40
44
  readonly INVALID_PPT_PAGES: "INVALID_PPT_PAGES";
41
45
  readonly INVALID_PPT_FORMAT: "INVALID_PPT_FORMAT";
@@ -214,6 +218,22 @@ export declare class ErrorFactory {
214
218
  * Create an invalid PPT provider error
215
219
  */
216
220
  static invalidPPTProvider(provider: unknown): NeuroLinkError;
221
+ /**
222
+ * Create a scorer not found error
223
+ */
224
+ static scorerNotFound(scorerId: string, availableScorers?: string[]): NeuroLinkError;
225
+ /**
226
+ * Create an evaluation validation error
227
+ */
228
+ static evaluationValidationFailed(scorerId: string, errors: string[]): NeuroLinkError;
229
+ /**
230
+ * Create an evaluation timeout error
231
+ */
232
+ static evaluationTimeout(operation: string, timeoutMs: number): NeuroLinkError;
233
+ /**
234
+ * Create an evaluation execution failed error
235
+ */
236
+ static evaluationExecutionFailed(operation: string, originalError: Error): NeuroLinkError;
217
237
  }
218
238
  /**
219
239
  * Timeout wrapper for async operations
@@ -47,6 +47,11 @@ export const ERROR_CODES = {
47
47
  RATE_LIMITER_QUEUE_FULL: "RATE_LIMITER_QUEUE_FULL",
48
48
  RATE_LIMITER_QUEUE_TIMEOUT: "RATE_LIMITER_QUEUE_TIMEOUT",
49
49
  RATE_LIMITER_RESET: "RATE_LIMITER_RESET",
50
+ // Evaluation errors
51
+ SCORER_NOT_FOUND: "SCORER_NOT_FOUND",
52
+ EVALUATION_VALIDATION_FAILED: "EVALUATION_VALIDATION_FAILED",
53
+ EVALUATION_TIMEOUT: "EVALUATION_TIMEOUT",
54
+ EVALUATION_EXECUTION_FAILED: "EVALUATION_EXECUTION_FAILED",
50
55
  // PPT validation errors
51
56
  MISSING_PPT_PROPERTIES: "MISSING_PPT_PROPERTIES",
52
57
  INVALID_PPT_PAGES: "INVALID_PPT_PAGES",
@@ -731,6 +736,61 @@ export class ErrorFactory {
731
736
  },
732
737
  });
733
738
  }
739
+ // ============================================================================
740
+ // EVALUATION ERRORS
741
+ // ============================================================================
742
+ /**
743
+ * Create a scorer not found error
744
+ */
745
+ static scorerNotFound(scorerId, availableScorers) {
746
+ return new NeuroLinkError({
747
+ code: ERROR_CODES.SCORER_NOT_FOUND,
748
+ message: `Scorer '${scorerId}' not found. Use neurolink.getAvailableScorers() to see available scorers.`,
749
+ category: ErrorCategory.VALIDATION,
750
+ severity: ErrorSeverity.MEDIUM,
751
+ retriable: false,
752
+ context: { scorerId, availableScorers },
753
+ });
754
+ }
755
+ /**
756
+ * Create an evaluation validation error
757
+ */
758
+ static evaluationValidationFailed(scorerId, errors) {
759
+ return new NeuroLinkError({
760
+ code: ERROR_CODES.EVALUATION_VALIDATION_FAILED,
761
+ message: `Invalid input for scorer '${scorerId}': ${errors.join(", ")}`,
762
+ category: ErrorCategory.VALIDATION,
763
+ severity: ErrorSeverity.MEDIUM,
764
+ retriable: false,
765
+ context: { scorerId, validationErrors: errors },
766
+ });
767
+ }
768
+ /**
769
+ * Create an evaluation timeout error
770
+ */
771
+ static evaluationTimeout(operation, timeoutMs) {
772
+ return new NeuroLinkError({
773
+ code: ERROR_CODES.EVALUATION_TIMEOUT,
774
+ message: `Evaluation ${operation} timed out after ${timeoutMs}ms`,
775
+ category: ErrorCategory.TIMEOUT,
776
+ severity: ErrorSeverity.HIGH,
777
+ retriable: true,
778
+ context: { operation, timeoutMs },
779
+ });
780
+ }
781
+ /**
782
+ * Create an evaluation execution failed error
783
+ */
784
+ static evaluationExecutionFailed(operation, originalError) {
785
+ return new NeuroLinkError({
786
+ code: ERROR_CODES.EVALUATION_EXECUTION_FAILED,
787
+ message: `Evaluation ${operation} failed: ${originalError.message}`,
788
+ category: ErrorCategory.EXECUTION,
789
+ severity: ErrorSeverity.HIGH,
790
+ retriable: false,
791
+ originalError,
792
+ });
793
+ }
734
794
  }
735
795
  /**
736
796
  * Timeout wrapper for async operations
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "9.36.1",
3
+ "version": "9.37.0",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",