@juspay/neurolink 9.36.1 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/types/index.d.ts +3 -1
  189. package/dist/lib/types/index.js +3 -2
  190. package/dist/lib/types/scorerTypes.d.ts +423 -0
  191. package/dist/lib/types/scorerTypes.js +6 -0
  192. package/dist/lib/utils/errorHandling.d.ts +20 -0
  193. package/dist/lib/utils/errorHandling.js +60 -0
  194. package/dist/neurolink.d.ts +204 -0
  195. package/dist/neurolink.js +296 -0
  196. package/dist/types/index.d.ts +3 -1
  197. package/dist/types/index.js +3 -2
  198. package/dist/types/scorerTypes.d.ts +423 -0
  199. package/dist/types/scorerTypes.js +5 -0
  200. package/dist/utils/errorHandling.d.ts +20 -0
  201. package/dist/utils/errorHandling.js +60 -0
  202. package/package.json +1 -1
@@ -0,0 +1,261 @@
1
+ /**
2
+ * @file Pipeline Builder
3
+ * Fluent builder API for creating evaluation pipelines
4
+ */
5
+ import { EvaluationPipeline } from "./evaluationPipeline.js";
6
+ /**
7
+ * Fluent builder for creating evaluation pipelines
8
+ */
9
+ export class PipelineBuilder {
10
+ _name;
11
+ _description;
12
+ _scorers = [];
13
+ _aggregation = { method: "average" };
14
+ _passThreshold = 0.7;
15
+ _executionMode = "parallel";
16
+ _stopOnFailure = false;
17
+ _timeout;
18
+ _requiredScorers = [];
19
+ constructor(name) {
20
+ this._name = name;
21
+ }
22
+ /**
23
+ * Create a new pipeline builder
24
+ */
25
+ static create(name) {
26
+ return new PipelineBuilder(name);
27
+ }
28
+ /**
29
+ * Set pipeline name
30
+ */
31
+ name(name) {
32
+ this._name = name;
33
+ return this;
34
+ }
35
+ /**
36
+ * Set pipeline description
37
+ */
38
+ description(desc) {
39
+ this._description = desc;
40
+ return this;
41
+ }
42
+ /**
43
+ * Add a scorer by ID
44
+ */
45
+ addScorer(id, config) {
46
+ this._scorers.push({ id, config });
47
+ return this;
48
+ }
49
+ /**
50
+ * Add multiple scorers
51
+ */
52
+ addScorers(...ids) {
53
+ for (const id of ids) {
54
+ this._scorers.push({ id });
55
+ }
56
+ return this;
57
+ }
58
+ /**
59
+ * Add a scorer and mark it as required
60
+ */
61
+ requireScorer(id, config) {
62
+ const existing = this._scorers.find((scorer) => scorer.id === id);
63
+ if (existing) {
64
+ existing.config = {
65
+ ...existing.config,
66
+ ...config,
67
+ };
68
+ }
69
+ else {
70
+ this._scorers.push({ id, config });
71
+ }
72
+ if (!this._requiredScorers.includes(id)) {
73
+ this._requiredScorers.push(id);
74
+ }
75
+ return this;
76
+ }
77
+ /**
78
+ * Set aggregation method
79
+ */
80
+ aggregateWith(method) {
81
+ this._aggregation.method = method;
82
+ return this;
83
+ }
84
+ /**
85
+ * Set weights for weighted aggregation
86
+ */
87
+ withWeights(weights) {
88
+ this._aggregation.method = "weighted";
89
+ this._aggregation.weights = weights;
90
+ return this;
91
+ }
92
+ /**
93
+ * Set custom aggregation function
94
+ */
95
+ customAggregation(fn) {
96
+ this._aggregation.method = "custom";
97
+ this._aggregation.customFn = fn;
98
+ return this;
99
+ }
100
+ /**
101
+ * Set pass/fail threshold
102
+ */
103
+ passThreshold(threshold) {
104
+ this._passThreshold = threshold;
105
+ return this;
106
+ }
107
+ /**
108
+ * Run scorers in parallel (default)
109
+ */
110
+ parallel() {
111
+ this._executionMode = "parallel";
112
+ return this;
113
+ }
114
+ /**
115
+ * Run scorers sequentially
116
+ */
117
+ sequential() {
118
+ this._executionMode = "sequential";
119
+ return this;
120
+ }
121
+ /**
122
+ * Stop pipeline on first failure
123
+ */
124
+ stopOnFailure() {
125
+ this._stopOnFailure = true;
126
+ return this;
127
+ }
128
+ /**
129
+ * Continue pipeline on failures (default)
130
+ */
131
+ continueOnFailure() {
132
+ this._stopOnFailure = false;
133
+ return this;
134
+ }
135
+ /**
136
+ * Set pipeline timeout
137
+ */
138
+ timeout(ms) {
139
+ this._timeout = ms;
140
+ return this;
141
+ }
142
+ /**
143
+ * Build the pipeline configuration
144
+ */
145
+ buildConfig() {
146
+ return {
147
+ name: this._name,
148
+ description: this._description,
149
+ scorers: this._scorers.map((scorer) => ({
150
+ id: scorer.id,
151
+ config: scorer.config ? { ...scorer.config } : undefined,
152
+ })),
153
+ aggregation: {
154
+ ...this._aggregation,
155
+ weights: this._aggregation.weights
156
+ ? { ...this._aggregation.weights }
157
+ : undefined,
158
+ },
159
+ passThreshold: this._passThreshold,
160
+ executionMode: this._executionMode,
161
+ stopOnFailure: this._stopOnFailure,
162
+ timeout: this._timeout,
163
+ requiredScorers: this._requiredScorers.length > 0
164
+ ? [...this._requiredScorers]
165
+ : undefined,
166
+ };
167
+ }
168
+ /**
169
+ * Build the pipeline (not initialized)
170
+ */
171
+ build() {
172
+ return new EvaluationPipeline(this.buildConfig());
173
+ }
174
+ /**
175
+ * Build and initialize the pipeline
176
+ */
177
+ async buildAndInitialize() {
178
+ const pipeline = this.build();
179
+ await pipeline.initialize();
180
+ return pipeline;
181
+ }
182
+ }
183
+ /**
184
+ * Quick pipeline builder factory
185
+ */
186
+ export const Pipelines = {
187
+ /**
188
+ * Create a new pipeline builder
189
+ */
190
+ create: (name) => PipelineBuilder.create(name),
191
+ /**
192
+ * Create a safety-focused pipeline
193
+ */
194
+ safety: () => PipelineBuilder.create("safety")
195
+ .description("Safety evaluation pipeline")
196
+ .addScorers("toxicity", "bias-detection")
197
+ .requireScorer("toxicity")
198
+ .aggregateWith("minimum")
199
+ .passThreshold(0.8),
200
+ /**
201
+ * Create a RAG evaluation pipeline
202
+ */
203
+ rag: () => PipelineBuilder.create("rag")
204
+ .description("RAG evaluation pipeline")
205
+ .addScorers("faithfulness", "context-relevancy", "answer-relevancy", "hallucination")
206
+ .withWeights({
207
+ faithfulness: 1.5,
208
+ "context-relevancy": 1.0,
209
+ "answer-relevancy": 1.0,
210
+ hallucination: 1.5,
211
+ })
212
+ .passThreshold(0.7),
213
+ /**
214
+ * Create a quality-focused pipeline
215
+ */
216
+ quality: () => PipelineBuilder.create("quality")
217
+ .description("Quality evaluation pipeline")
218
+ .addScorers("tone-consistency", "prompt-alignment", "length", "format")
219
+ .aggregateWith("average")
220
+ .passThreshold(0.7),
221
+ /**
222
+ * Create a comprehensive pipeline with all scorers
223
+ */
224
+ comprehensive: () => PipelineBuilder.create("comprehensive")
225
+ .description("Comprehensive evaluation pipeline")
226
+ .addScorers("toxicity", "bias-detection", "hallucination", "faithfulness", "context-relevancy", "answer-relevancy", "tone-consistency", "prompt-alignment")
227
+ .requireScorer("toxicity")
228
+ .withWeights({
229
+ toxicity: 2.0,
230
+ "bias-detection": 1.5,
231
+ hallucination: 1.5,
232
+ faithfulness: 1.0,
233
+ "context-relevancy": 1.0,
234
+ "answer-relevancy": 1.0,
235
+ "tone-consistency": 0.8,
236
+ "prompt-alignment": 0.8,
237
+ })
238
+ .passThreshold(0.75),
239
+ /**
240
+ * Create a minimal fast pipeline
241
+ */
242
+ minimal: () => PipelineBuilder.create("minimal")
243
+ .description("Minimal fast evaluation pipeline")
244
+ .addScorers("toxicity", "hallucination")
245
+ .parallel()
246
+ .passThreshold(0.8),
247
+ /**
248
+ * Create a summarization evaluation pipeline
249
+ */
250
+ summarization: () => PipelineBuilder.create("summarization")
251
+ .description("Summarization quality evaluation pipeline")
252
+ .addScorers("summarization", "faithfulness", "content-similarity", "length")
253
+ .withWeights({
254
+ summarization: 1.5,
255
+ faithfulness: 1.2,
256
+ "content-similarity": 1.0,
257
+ length: 0.8,
258
+ })
259
+ .passThreshold(0.7),
260
+ };
261
+ //# sourceMappingURL=pipelineBuilder.js.map
@@ -0,0 +1,66 @@
1
+ /**
2
+ * @file Pipeline Presets
3
+ * Pre-configured evaluation pipelines for common use cases
4
+ */
5
+ import type { PipelineConfig } from "../../types/scorerTypes.js";
6
+ /**
7
+ * Safety evaluation preset
8
+ * Focuses on content safety: toxicity, bias, harmful content
9
+ */
10
+ export declare const SAFETY_PIPELINE: PipelineConfig;
11
+ /**
12
+ * RAG evaluation preset
13
+ * Evaluates Retrieval Augmented Generation quality
14
+ */
15
+ export declare const RAG_PIPELINE: PipelineConfig;
16
+ /**
17
+ * Quality evaluation preset
18
+ * Focuses on response quality: format, length, tone
19
+ */
20
+ export declare const QUALITY_PIPELINE: PipelineConfig;
21
+ /**
22
+ * Comprehensive evaluation preset
23
+ * Full evaluation across all dimensions
24
+ */
25
+ export declare const COMPREHENSIVE_PIPELINE: PipelineConfig;
26
+ /**
27
+ * Minimal/fast evaluation preset
28
+ * Quick checks for high-throughput scenarios
29
+ */
30
+ export declare const MINIMAL_PIPELINE: PipelineConfig;
31
+ /**
32
+ * Summarization evaluation preset
33
+ * Evaluates summarization quality
34
+ */
35
+ export declare const SUMMARIZATION_PIPELINE: PipelineConfig;
36
+ /**
37
+ * Customer support evaluation preset
38
+ * Tailored for customer service responses
39
+ */
40
+ export declare const CUSTOMER_SUPPORT_PIPELINE: PipelineConfig;
41
+ /**
42
+ * Code generation evaluation preset
43
+ * Evaluates generated code quality
44
+ */
45
+ export declare const CODE_GENERATION_PIPELINE: PipelineConfig;
46
+ /**
47
+ * All available presets
48
+ */
49
+ export declare const PipelinePresets: {
50
+ readonly safety: PipelineConfig;
51
+ readonly rag: PipelineConfig;
52
+ readonly quality: PipelineConfig;
53
+ readonly comprehensive: PipelineConfig;
54
+ readonly minimal: PipelineConfig;
55
+ readonly summarization: PipelineConfig;
56
+ readonly customerSupport: PipelineConfig;
57
+ readonly codeGeneration: PipelineConfig;
58
+ };
59
+ /**
60
+ * Get a preset pipeline configuration by name
61
+ */
62
+ export declare function getPreset(name: keyof typeof PipelinePresets): PipelineConfig;
63
+ /**
64
+ * Get all available preset names
65
+ */
66
+ export declare function getPresetNames(): string[];
@@ -0,0 +1,225 @@
1
+ /**
2
+ * @file Pipeline Presets
3
+ * Pre-configured evaluation pipelines for common use cases
4
+ */
5
+ /**
6
+ * Safety evaluation preset
7
+ * Focuses on content safety: toxicity, bias, harmful content
8
+ */
9
+ export const SAFETY_PIPELINE = {
10
+ name: "safety",
11
+ description: "Safety evaluation pipeline for detecting harmful content",
12
+ scorers: [
13
+ { id: "toxicity", config: { threshold: 0.9, weight: 2.0 } },
14
+ { id: "bias-detection", config: { threshold: 0.8, weight: 1.5 } },
15
+ ],
16
+ aggregation: {
17
+ method: "minimum",
18
+ },
19
+ passThreshold: 0.8,
20
+ executionMode: "parallel",
21
+ requiredScorers: ["toxicity"],
22
+ };
23
+ /**
24
+ * RAG evaluation preset
25
+ * Evaluates Retrieval Augmented Generation quality
26
+ */
27
+ export const RAG_PIPELINE = {
28
+ name: "rag",
29
+ description: "RAG evaluation pipeline for retrieval-augmented generation",
30
+ scorers: [
31
+ { id: "faithfulness", config: { weight: 1.5 } },
32
+ { id: "context-relevancy", config: { weight: 1.0 } },
33
+ { id: "context-precision", config: { weight: 1.0 } },
34
+ { id: "answer-relevancy", config: { weight: 1.2 } },
35
+ { id: "hallucination", config: { weight: 1.5 } },
36
+ ],
37
+ aggregation: {
38
+ method: "weighted",
39
+ weights: {
40
+ faithfulness: 1.5,
41
+ "context-relevancy": 1.0,
42
+ "context-precision": 1.0,
43
+ "answer-relevancy": 1.2,
44
+ hallucination: 1.5,
45
+ },
46
+ },
47
+ passThreshold: 0.7,
48
+ executionMode: "parallel",
49
+ requiredScorers: ["faithfulness", "hallucination"],
50
+ };
51
+ /**
52
+ * Quality evaluation preset
53
+ * Focuses on response quality: format, length, tone
54
+ */
55
+ export const QUALITY_PIPELINE = {
56
+ name: "quality",
57
+ description: "Quality evaluation pipeline for response assessment",
58
+ scorers: [
59
+ { id: "tone-consistency", config: { weight: 1.0 } },
60
+ { id: "prompt-alignment", config: { weight: 1.2 } },
61
+ { id: "length", config: { weight: 0.8 } },
62
+ { id: "format", config: { weight: 0.8 } },
63
+ ],
64
+ aggregation: {
65
+ method: "average",
66
+ },
67
+ passThreshold: 0.7,
68
+ executionMode: "parallel",
69
+ };
70
+ /**
71
+ * Comprehensive evaluation preset
72
+ * Full evaluation across all dimensions
73
+ */
74
+ export const COMPREHENSIVE_PIPELINE = {
75
+ name: "comprehensive",
76
+ description: "Comprehensive evaluation pipeline covering all aspects",
77
+ scorers: [
78
+ // Safety scorers
79
+ { id: "toxicity", config: { threshold: 0.9, weight: 2.0 } },
80
+ { id: "bias-detection", config: { threshold: 0.8, weight: 1.5 } },
81
+ // Accuracy scorers
82
+ { id: "hallucination", config: { weight: 1.5 } },
83
+ { id: "faithfulness", config: { weight: 1.2 } },
84
+ // Relevancy scorers
85
+ { id: "context-relevancy", config: { weight: 1.0 } },
86
+ { id: "answer-relevancy", config: { weight: 1.0 } },
87
+ // Quality scorers
88
+ { id: "tone-consistency", config: { weight: 0.8 } },
89
+ { id: "prompt-alignment", config: { weight: 1.0 } },
90
+ ],
91
+ aggregation: {
92
+ method: "weighted",
93
+ weights: {
94
+ toxicity: 2.0,
95
+ "bias-detection": 1.5,
96
+ hallucination: 1.5,
97
+ faithfulness: 1.2,
98
+ "context-relevancy": 1.0,
99
+ "answer-relevancy": 1.0,
100
+ "tone-consistency": 0.8,
101
+ "prompt-alignment": 1.0,
102
+ },
103
+ },
104
+ passThreshold: 0.75,
105
+ executionMode: "parallel",
106
+ requiredScorers: ["toxicity"],
107
+ };
108
+ /**
109
+ * Minimal/fast evaluation preset
110
+ * Quick checks for high-throughput scenarios
111
+ */
112
+ export const MINIMAL_PIPELINE = {
113
+ name: "minimal",
114
+ description: "Minimal fast evaluation for high-throughput scenarios",
115
+ scorers: [
116
+ { id: "toxicity", config: { threshold: 0.9 } },
117
+ { id: "hallucination", config: { threshold: 0.8 } },
118
+ ],
119
+ aggregation: {
120
+ method: "minimum",
121
+ },
122
+ passThreshold: 0.8,
123
+ executionMode: "parallel",
124
+ timeout: 10000,
125
+ };
126
+ /**
127
+ * Summarization evaluation preset
128
+ * Evaluates summarization quality
129
+ */
130
+ export const SUMMARIZATION_PIPELINE = {
131
+ name: "summarization",
132
+ description: "Summarization quality evaluation pipeline",
133
+ scorers: [
134
+ { id: "summarization", config: { weight: 1.5 } },
135
+ { id: "faithfulness", config: { weight: 1.2 } },
136
+ { id: "content-similarity", config: { weight: 1.0 } },
137
+ { id: "length", config: { weight: 0.8 } },
138
+ ],
139
+ aggregation: {
140
+ method: "weighted",
141
+ weights: {
142
+ summarization: 1.5,
143
+ faithfulness: 1.2,
144
+ "content-similarity": 1.0,
145
+ length: 0.8,
146
+ },
147
+ },
148
+ passThreshold: 0.7,
149
+ executionMode: "parallel",
150
+ };
151
+ /**
152
+ * Customer support evaluation preset
153
+ * Tailored for customer service responses
154
+ */
155
+ export const CUSTOMER_SUPPORT_PIPELINE = {
156
+ name: "customerSupport",
157
+ description: "Customer support response evaluation pipeline",
158
+ scorers: [
159
+ { id: "toxicity", config: { threshold: 0.95, weight: 2.0 } },
160
+ { id: "tone-consistency", config: { weight: 1.5 } },
161
+ { id: "prompt-alignment", config: { weight: 1.2 } },
162
+ { id: "answer-relevancy", config: { weight: 1.0 } },
163
+ ],
164
+ aggregation: {
165
+ method: "weighted",
166
+ weights: {
167
+ toxicity: 2.0,
168
+ "tone-consistency": 1.5,
169
+ "prompt-alignment": 1.2,
170
+ "answer-relevancy": 1.0,
171
+ },
172
+ },
173
+ passThreshold: 0.8,
174
+ executionMode: "parallel",
175
+ requiredScorers: ["toxicity"],
176
+ };
177
+ /**
178
+ * Code generation evaluation preset
179
+ * Evaluates generated code quality
180
+ */
181
+ export const CODE_GENERATION_PIPELINE = {
182
+ name: "codeGeneration",
183
+ description: "Code generation quality evaluation pipeline",
184
+ scorers: [
185
+ { id: "format", config: { weight: 1.0 } },
186
+ { id: "prompt-alignment", config: { weight: 1.5 } },
187
+ { id: "hallucination", config: { weight: 1.2 } },
188
+ ],
189
+ aggregation: {
190
+ method: "weighted",
191
+ weights: {
192
+ format: 1.0,
193
+ "prompt-alignment": 1.5,
194
+ hallucination: 1.2,
195
+ },
196
+ },
197
+ passThreshold: 0.75,
198
+ executionMode: "sequential",
199
+ };
200
+ /**
201
+ * All available presets
202
+ */
203
+ export const PipelinePresets = {
204
+ safety: SAFETY_PIPELINE,
205
+ rag: RAG_PIPELINE,
206
+ quality: QUALITY_PIPELINE,
207
+ comprehensive: COMPREHENSIVE_PIPELINE,
208
+ minimal: MINIMAL_PIPELINE,
209
+ summarization: SUMMARIZATION_PIPELINE,
210
+ customerSupport: CUSTOMER_SUPPORT_PIPELINE,
211
+ codeGeneration: CODE_GENERATION_PIPELINE,
212
+ };
213
+ /**
214
+ * Get a preset pipeline configuration by name
215
+ */
216
+ export function getPreset(name) {
217
+ return PipelinePresets[name];
218
+ }
219
+ /**
220
+ * Get all available preset names
221
+ */
222
+ export function getPresetNames() {
223
+ return Object.keys(PipelinePresets);
224
+ }
225
+ //# sourceMappingURL=presets.js.map
@@ -0,0 +1,99 @@
1
+ /**
2
+ * @file Batch Strategy
3
+ * Batch processing for evaluation pipelines
4
+ */
5
+ import type { ScorerInput } from "../../../types/scorerTypes.js";
6
+ import type { EvaluationPipeline, PipelineExecutionOptions, PipelineResult } from "../evaluationPipeline.js";
7
+ /**
8
+ * Batch processing configuration
9
+ */
10
+ export type BatchConfig = {
11
+ /** Maximum concurrent evaluations */
12
+ concurrency?: number;
13
+ /** Delay between batches (ms) */
14
+ batchDelay?: number;
15
+ /** Continue on individual failures */
16
+ continueOnError?: boolean;
17
+ /** Progress callback */
18
+ onProgress?: (progress: BatchProgress) => void;
19
+ /** Individual result callback */
20
+ onResult?: (result: BatchItemResult) => void;
21
+ };
22
+ /**
23
+ * Batch progress information
24
+ */
25
+ export type BatchProgress = {
26
+ total: number;
27
+ completed: number;
28
+ failed: number;
29
+ remaining: number;
30
+ percentComplete: number;
31
+ estimatedTimeRemaining?: number;
32
+ };
33
+ /**
34
+ * Individual batch item result
35
+ */
36
+ export type BatchItemResult = {
37
+ index: number;
38
+ input: ScorerInput;
39
+ result?: PipelineResult;
40
+ error?: string;
41
+ duration: number;
42
+ };
43
+ /**
44
+ * Batch evaluation result
45
+ */
46
+ export type BatchResult = {
47
+ /** All individual results */
48
+ results: BatchItemResult[];
49
+ /** Summary statistics */
50
+ summary: {
51
+ total: number;
52
+ successful: number;
53
+ failed: number;
54
+ averageScore: number;
55
+ passRate: number;
56
+ totalDuration: number;
57
+ averageDuration: number;
58
+ };
59
+ };
60
+ /**
61
+ * Batch evaluation strategy
62
+ */
63
+ export declare class BatchStrategy {
64
+ private _pipeline;
65
+ private _config;
66
+ constructor(pipeline: EvaluationPipeline, config?: BatchConfig);
67
+ /**
68
+ * Evaluate a batch of inputs
69
+ */
70
+ evaluate(inputs: ScorerInput[], options?: PipelineExecutionOptions): Promise<BatchResult>;
71
+ /**
72
+ * Evaluate a single item
73
+ */
74
+ private _evaluateItem;
75
+ /**
76
+ * Estimate remaining time based on average duration
77
+ */
78
+ private _estimateRemainingTime;
79
+ /**
80
+ * Delay helper
81
+ */
82
+ private _delay;
83
+ /**
84
+ * Update configuration
85
+ */
86
+ configure(config: Partial<BatchConfig>): void;
87
+ }
88
+ /**
89
+ * Create a batch strategy for a pipeline
90
+ */
91
+ export declare function createBatchStrategy(pipeline: EvaluationPipeline, config?: BatchConfig): BatchStrategy;
92
+ /**
93
+ * Evaluate a batch of inputs using a pipeline
94
+ */
95
+ export declare function evaluateBatch(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?: BatchConfig): Promise<BatchResult>;
96
+ /**
97
+ * Stream batch evaluation results
98
+ */
99
+ export declare function streamBatchEvaluation(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?: Omit<BatchConfig, "onResult" | "onProgress">): AsyncGenerator<BatchItemResult, BatchResult["summary"], void>;