@juspay/neurolink 9.36.1 → 9.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +1105 -556
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/lib/processors/media/VideoProcessor.js +90 -41
- package/dist/lib/telemetry/telemetryService.d.ts +1 -1
- package/dist/lib/telemetry/telemetryService.js +27 -13
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/processors/media/VideoProcessor.js +90 -41
- package/dist/telemetry/telemetryService.d.ts +1 -1
- package/dist/telemetry/telemetryService.js +27 -13
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +7 -7
- package/dist/processors/media/ffprobe-static.d.ts +0 -4
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Pipeline Builder
|
|
3
|
+
* Fluent builder API for creating evaluation pipelines
|
|
4
|
+
*/
|
|
5
|
+
import { EvaluationPipeline } from "./evaluationPipeline.js";
|
|
6
|
+
/**
|
|
7
|
+
* Fluent builder for creating evaluation pipelines
|
|
8
|
+
*/
|
|
9
|
+
export class PipelineBuilder {
|
|
10
|
+
_name;
|
|
11
|
+
_description;
|
|
12
|
+
_scorers = [];
|
|
13
|
+
_aggregation = { method: "average" };
|
|
14
|
+
_passThreshold = 0.7;
|
|
15
|
+
_executionMode = "parallel";
|
|
16
|
+
_stopOnFailure = false;
|
|
17
|
+
_timeout;
|
|
18
|
+
_requiredScorers = [];
|
|
19
|
+
constructor(name) {
|
|
20
|
+
this._name = name;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Create a new pipeline builder
|
|
24
|
+
*/
|
|
25
|
+
static create(name) {
|
|
26
|
+
return new PipelineBuilder(name);
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Set pipeline name
|
|
30
|
+
*/
|
|
31
|
+
name(name) {
|
|
32
|
+
this._name = name;
|
|
33
|
+
return this;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Set pipeline description
|
|
37
|
+
*/
|
|
38
|
+
description(desc) {
|
|
39
|
+
this._description = desc;
|
|
40
|
+
return this;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Add a scorer by ID
|
|
44
|
+
*/
|
|
45
|
+
addScorer(id, config) {
|
|
46
|
+
this._scorers.push({ id, config });
|
|
47
|
+
return this;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Add multiple scorers
|
|
51
|
+
*/
|
|
52
|
+
addScorers(...ids) {
|
|
53
|
+
for (const id of ids) {
|
|
54
|
+
this._scorers.push({ id });
|
|
55
|
+
}
|
|
56
|
+
return this;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Add a scorer and mark it as required
|
|
60
|
+
*/
|
|
61
|
+
requireScorer(id, config) {
|
|
62
|
+
const existing = this._scorers.find((scorer) => scorer.id === id);
|
|
63
|
+
if (existing) {
|
|
64
|
+
existing.config = {
|
|
65
|
+
...existing.config,
|
|
66
|
+
...config,
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
this._scorers.push({ id, config });
|
|
71
|
+
}
|
|
72
|
+
if (!this._requiredScorers.includes(id)) {
|
|
73
|
+
this._requiredScorers.push(id);
|
|
74
|
+
}
|
|
75
|
+
return this;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Set aggregation method
|
|
79
|
+
*/
|
|
80
|
+
aggregateWith(method) {
|
|
81
|
+
this._aggregation.method = method;
|
|
82
|
+
return this;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Set weights for weighted aggregation
|
|
86
|
+
*/
|
|
87
|
+
withWeights(weights) {
|
|
88
|
+
this._aggregation.method = "weighted";
|
|
89
|
+
this._aggregation.weights = weights;
|
|
90
|
+
return this;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Set custom aggregation function
|
|
94
|
+
*/
|
|
95
|
+
customAggregation(fn) {
|
|
96
|
+
this._aggregation.method = "custom";
|
|
97
|
+
this._aggregation.customFn = fn;
|
|
98
|
+
return this;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Set pass/fail threshold
|
|
102
|
+
*/
|
|
103
|
+
passThreshold(threshold) {
|
|
104
|
+
this._passThreshold = threshold;
|
|
105
|
+
return this;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Run scorers in parallel (default)
|
|
109
|
+
*/
|
|
110
|
+
parallel() {
|
|
111
|
+
this._executionMode = "parallel";
|
|
112
|
+
return this;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Run scorers sequentially
|
|
116
|
+
*/
|
|
117
|
+
sequential() {
|
|
118
|
+
this._executionMode = "sequential";
|
|
119
|
+
return this;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Stop pipeline on first failure
|
|
123
|
+
*/
|
|
124
|
+
stopOnFailure() {
|
|
125
|
+
this._stopOnFailure = true;
|
|
126
|
+
return this;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* Continue pipeline on failures (default)
|
|
130
|
+
*/
|
|
131
|
+
continueOnFailure() {
|
|
132
|
+
this._stopOnFailure = false;
|
|
133
|
+
return this;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Set pipeline timeout
|
|
137
|
+
*/
|
|
138
|
+
timeout(ms) {
|
|
139
|
+
this._timeout = ms;
|
|
140
|
+
return this;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Build the pipeline configuration
|
|
144
|
+
*/
|
|
145
|
+
buildConfig() {
|
|
146
|
+
return {
|
|
147
|
+
name: this._name,
|
|
148
|
+
description: this._description,
|
|
149
|
+
scorers: this._scorers.map((scorer) => ({
|
|
150
|
+
id: scorer.id,
|
|
151
|
+
config: scorer.config ? { ...scorer.config } : undefined,
|
|
152
|
+
})),
|
|
153
|
+
aggregation: {
|
|
154
|
+
...this._aggregation,
|
|
155
|
+
weights: this._aggregation.weights
|
|
156
|
+
? { ...this._aggregation.weights }
|
|
157
|
+
: undefined,
|
|
158
|
+
},
|
|
159
|
+
passThreshold: this._passThreshold,
|
|
160
|
+
executionMode: this._executionMode,
|
|
161
|
+
stopOnFailure: this._stopOnFailure,
|
|
162
|
+
timeout: this._timeout,
|
|
163
|
+
requiredScorers: this._requiredScorers.length > 0
|
|
164
|
+
? [...this._requiredScorers]
|
|
165
|
+
: undefined,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Build the pipeline (not initialized)
|
|
170
|
+
*/
|
|
171
|
+
build() {
|
|
172
|
+
return new EvaluationPipeline(this.buildConfig());
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Build and initialize the pipeline
|
|
176
|
+
*/
|
|
177
|
+
async buildAndInitialize() {
|
|
178
|
+
const pipeline = this.build();
|
|
179
|
+
await pipeline.initialize();
|
|
180
|
+
return pipeline;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Quick pipeline builder factory
|
|
185
|
+
*/
|
|
186
|
+
export const Pipelines = {
|
|
187
|
+
/**
|
|
188
|
+
* Create a new pipeline builder
|
|
189
|
+
*/
|
|
190
|
+
create: (name) => PipelineBuilder.create(name),
|
|
191
|
+
/**
|
|
192
|
+
* Create a safety-focused pipeline
|
|
193
|
+
*/
|
|
194
|
+
safety: () => PipelineBuilder.create("safety")
|
|
195
|
+
.description("Safety evaluation pipeline")
|
|
196
|
+
.addScorers("toxicity", "bias-detection")
|
|
197
|
+
.requireScorer("toxicity")
|
|
198
|
+
.aggregateWith("minimum")
|
|
199
|
+
.passThreshold(0.8),
|
|
200
|
+
/**
|
|
201
|
+
* Create a RAG evaluation pipeline
|
|
202
|
+
*/
|
|
203
|
+
rag: () => PipelineBuilder.create("rag")
|
|
204
|
+
.description("RAG evaluation pipeline")
|
|
205
|
+
.addScorers("faithfulness", "context-relevancy", "answer-relevancy", "hallucination")
|
|
206
|
+
.withWeights({
|
|
207
|
+
faithfulness: 1.5,
|
|
208
|
+
"context-relevancy": 1.0,
|
|
209
|
+
"answer-relevancy": 1.0,
|
|
210
|
+
hallucination: 1.5,
|
|
211
|
+
})
|
|
212
|
+
.passThreshold(0.7),
|
|
213
|
+
/**
|
|
214
|
+
* Create a quality-focused pipeline
|
|
215
|
+
*/
|
|
216
|
+
quality: () => PipelineBuilder.create("quality")
|
|
217
|
+
.description("Quality evaluation pipeline")
|
|
218
|
+
.addScorers("tone-consistency", "prompt-alignment", "length", "format")
|
|
219
|
+
.aggregateWith("average")
|
|
220
|
+
.passThreshold(0.7),
|
|
221
|
+
/**
|
|
222
|
+
* Create a comprehensive pipeline with all scorers
|
|
223
|
+
*/
|
|
224
|
+
comprehensive: () => PipelineBuilder.create("comprehensive")
|
|
225
|
+
.description("Comprehensive evaluation pipeline")
|
|
226
|
+
.addScorers("toxicity", "bias-detection", "hallucination", "faithfulness", "context-relevancy", "answer-relevancy", "tone-consistency", "prompt-alignment")
|
|
227
|
+
.requireScorer("toxicity")
|
|
228
|
+
.withWeights({
|
|
229
|
+
toxicity: 2.0,
|
|
230
|
+
"bias-detection": 1.5,
|
|
231
|
+
hallucination: 1.5,
|
|
232
|
+
faithfulness: 1.0,
|
|
233
|
+
"context-relevancy": 1.0,
|
|
234
|
+
"answer-relevancy": 1.0,
|
|
235
|
+
"tone-consistency": 0.8,
|
|
236
|
+
"prompt-alignment": 0.8,
|
|
237
|
+
})
|
|
238
|
+
.passThreshold(0.75),
|
|
239
|
+
/**
|
|
240
|
+
* Create a minimal fast pipeline
|
|
241
|
+
*/
|
|
242
|
+
minimal: () => PipelineBuilder.create("minimal")
|
|
243
|
+
.description("Minimal fast evaluation pipeline")
|
|
244
|
+
.addScorers("toxicity", "hallucination")
|
|
245
|
+
.parallel()
|
|
246
|
+
.passThreshold(0.8),
|
|
247
|
+
/**
|
|
248
|
+
* Create a summarization evaluation pipeline
|
|
249
|
+
*/
|
|
250
|
+
summarization: () => PipelineBuilder.create("summarization")
|
|
251
|
+
.description("Summarization quality evaluation pipeline")
|
|
252
|
+
.addScorers("summarization", "faithfulness", "content-similarity", "length")
|
|
253
|
+
.withWeights({
|
|
254
|
+
summarization: 1.5,
|
|
255
|
+
faithfulness: 1.2,
|
|
256
|
+
"content-similarity": 1.0,
|
|
257
|
+
length: 0.8,
|
|
258
|
+
})
|
|
259
|
+
.passThreshold(0.7),
|
|
260
|
+
};
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Pipeline Presets
|
|
3
|
+
* Pre-configured evaluation pipelines for common use cases
|
|
4
|
+
*/
|
|
5
|
+
import type { PipelineConfig } from "../../types/scorerTypes.js";
|
|
6
|
+
/**
|
|
7
|
+
* Safety evaluation preset
|
|
8
|
+
* Focuses on content safety: toxicity, bias, harmful content
|
|
9
|
+
*/
|
|
10
|
+
export declare const SAFETY_PIPELINE: PipelineConfig;
|
|
11
|
+
/**
|
|
12
|
+
* RAG evaluation preset
|
|
13
|
+
* Evaluates Retrieval Augmented Generation quality
|
|
14
|
+
*/
|
|
15
|
+
export declare const RAG_PIPELINE: PipelineConfig;
|
|
16
|
+
/**
|
|
17
|
+
* Quality evaluation preset
|
|
18
|
+
* Focuses on response quality: format, length, tone
|
|
19
|
+
*/
|
|
20
|
+
export declare const QUALITY_PIPELINE: PipelineConfig;
|
|
21
|
+
/**
|
|
22
|
+
* Comprehensive evaluation preset
|
|
23
|
+
* Full evaluation across all dimensions
|
|
24
|
+
*/
|
|
25
|
+
export declare const COMPREHENSIVE_PIPELINE: PipelineConfig;
|
|
26
|
+
/**
|
|
27
|
+
* Minimal/fast evaluation preset
|
|
28
|
+
* Quick checks for high-throughput scenarios
|
|
29
|
+
*/
|
|
30
|
+
export declare const MINIMAL_PIPELINE: PipelineConfig;
|
|
31
|
+
/**
|
|
32
|
+
* Summarization evaluation preset
|
|
33
|
+
* Evaluates summarization quality
|
|
34
|
+
*/
|
|
35
|
+
export declare const SUMMARIZATION_PIPELINE: PipelineConfig;
|
|
36
|
+
/**
|
|
37
|
+
* Customer support evaluation preset
|
|
38
|
+
* Tailored for customer service responses
|
|
39
|
+
*/
|
|
40
|
+
export declare const CUSTOMER_SUPPORT_PIPELINE: PipelineConfig;
|
|
41
|
+
/**
|
|
42
|
+
* Code generation evaluation preset
|
|
43
|
+
* Evaluates generated code quality
|
|
44
|
+
*/
|
|
45
|
+
export declare const CODE_GENERATION_PIPELINE: PipelineConfig;
|
|
46
|
+
/**
|
|
47
|
+
* All available presets
|
|
48
|
+
*/
|
|
49
|
+
export declare const PipelinePresets: {
|
|
50
|
+
readonly safety: PipelineConfig;
|
|
51
|
+
readonly rag: PipelineConfig;
|
|
52
|
+
readonly quality: PipelineConfig;
|
|
53
|
+
readonly comprehensive: PipelineConfig;
|
|
54
|
+
readonly minimal: PipelineConfig;
|
|
55
|
+
readonly summarization: PipelineConfig;
|
|
56
|
+
readonly customerSupport: PipelineConfig;
|
|
57
|
+
readonly codeGeneration: PipelineConfig;
|
|
58
|
+
};
|
|
59
|
+
/**
|
|
60
|
+
* Get a preset pipeline configuration by name
|
|
61
|
+
*/
|
|
62
|
+
export declare function getPreset(name: keyof typeof PipelinePresets): PipelineConfig;
|
|
63
|
+
/**
|
|
64
|
+
* Get all available preset names
|
|
65
|
+
*/
|
|
66
|
+
export declare function getPresetNames(): string[];
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Pipeline Presets
|
|
3
|
+
* Pre-configured evaluation pipelines for common use cases
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Safety evaluation preset
|
|
7
|
+
* Focuses on content safety: toxicity, bias, harmful content
|
|
8
|
+
*/
|
|
9
|
+
export const SAFETY_PIPELINE = {
|
|
10
|
+
name: "safety",
|
|
11
|
+
description: "Safety evaluation pipeline for detecting harmful content",
|
|
12
|
+
scorers: [
|
|
13
|
+
{ id: "toxicity", config: { threshold: 0.9, weight: 2.0 } },
|
|
14
|
+
{ id: "bias-detection", config: { threshold: 0.8, weight: 1.5 } },
|
|
15
|
+
],
|
|
16
|
+
aggregation: {
|
|
17
|
+
method: "minimum",
|
|
18
|
+
},
|
|
19
|
+
passThreshold: 0.8,
|
|
20
|
+
executionMode: "parallel",
|
|
21
|
+
requiredScorers: ["toxicity"],
|
|
22
|
+
};
|
|
23
|
+
/**
|
|
24
|
+
* RAG evaluation preset
|
|
25
|
+
* Evaluates Retrieval Augmented Generation quality
|
|
26
|
+
*/
|
|
27
|
+
export const RAG_PIPELINE = {
|
|
28
|
+
name: "rag",
|
|
29
|
+
description: "RAG evaluation pipeline for retrieval-augmented generation",
|
|
30
|
+
scorers: [
|
|
31
|
+
{ id: "faithfulness", config: { weight: 1.5 } },
|
|
32
|
+
{ id: "context-relevancy", config: { weight: 1.0 } },
|
|
33
|
+
{ id: "context-precision", config: { weight: 1.0 } },
|
|
34
|
+
{ id: "answer-relevancy", config: { weight: 1.2 } },
|
|
35
|
+
{ id: "hallucination", config: { weight: 1.5 } },
|
|
36
|
+
],
|
|
37
|
+
aggregation: {
|
|
38
|
+
method: "weighted",
|
|
39
|
+
weights: {
|
|
40
|
+
faithfulness: 1.5,
|
|
41
|
+
"context-relevancy": 1.0,
|
|
42
|
+
"context-precision": 1.0,
|
|
43
|
+
"answer-relevancy": 1.2,
|
|
44
|
+
hallucination: 1.5,
|
|
45
|
+
},
|
|
46
|
+
},
|
|
47
|
+
passThreshold: 0.7,
|
|
48
|
+
executionMode: "parallel",
|
|
49
|
+
requiredScorers: ["faithfulness", "hallucination"],
|
|
50
|
+
};
|
|
51
|
+
/**
|
|
52
|
+
* Quality evaluation preset
|
|
53
|
+
* Focuses on response quality: format, length, tone
|
|
54
|
+
*/
|
|
55
|
+
export const QUALITY_PIPELINE = {
|
|
56
|
+
name: "quality",
|
|
57
|
+
description: "Quality evaluation pipeline for response assessment",
|
|
58
|
+
scorers: [
|
|
59
|
+
{ id: "tone-consistency", config: { weight: 1.0 } },
|
|
60
|
+
{ id: "prompt-alignment", config: { weight: 1.2 } },
|
|
61
|
+
{ id: "length", config: { weight: 0.8 } },
|
|
62
|
+
{ id: "format", config: { weight: 0.8 } },
|
|
63
|
+
],
|
|
64
|
+
aggregation: {
|
|
65
|
+
method: "average",
|
|
66
|
+
},
|
|
67
|
+
passThreshold: 0.7,
|
|
68
|
+
executionMode: "parallel",
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Comprehensive evaluation preset
|
|
72
|
+
* Full evaluation across all dimensions
|
|
73
|
+
*/
|
|
74
|
+
export const COMPREHENSIVE_PIPELINE = {
|
|
75
|
+
name: "comprehensive",
|
|
76
|
+
description: "Comprehensive evaluation pipeline covering all aspects",
|
|
77
|
+
scorers: [
|
|
78
|
+
// Safety scorers
|
|
79
|
+
{ id: "toxicity", config: { threshold: 0.9, weight: 2.0 } },
|
|
80
|
+
{ id: "bias-detection", config: { threshold: 0.8, weight: 1.5 } },
|
|
81
|
+
// Accuracy scorers
|
|
82
|
+
{ id: "hallucination", config: { weight: 1.5 } },
|
|
83
|
+
{ id: "faithfulness", config: { weight: 1.2 } },
|
|
84
|
+
// Relevancy scorers
|
|
85
|
+
{ id: "context-relevancy", config: { weight: 1.0 } },
|
|
86
|
+
{ id: "answer-relevancy", config: { weight: 1.0 } },
|
|
87
|
+
// Quality scorers
|
|
88
|
+
{ id: "tone-consistency", config: { weight: 0.8 } },
|
|
89
|
+
{ id: "prompt-alignment", config: { weight: 1.0 } },
|
|
90
|
+
],
|
|
91
|
+
aggregation: {
|
|
92
|
+
method: "weighted",
|
|
93
|
+
weights: {
|
|
94
|
+
toxicity: 2.0,
|
|
95
|
+
"bias-detection": 1.5,
|
|
96
|
+
hallucination: 1.5,
|
|
97
|
+
faithfulness: 1.2,
|
|
98
|
+
"context-relevancy": 1.0,
|
|
99
|
+
"answer-relevancy": 1.0,
|
|
100
|
+
"tone-consistency": 0.8,
|
|
101
|
+
"prompt-alignment": 1.0,
|
|
102
|
+
},
|
|
103
|
+
},
|
|
104
|
+
passThreshold: 0.75,
|
|
105
|
+
executionMode: "parallel",
|
|
106
|
+
requiredScorers: ["toxicity"],
|
|
107
|
+
};
|
|
108
|
+
/**
|
|
109
|
+
* Minimal/fast evaluation preset
|
|
110
|
+
* Quick checks for high-throughput scenarios
|
|
111
|
+
*/
|
|
112
|
+
export const MINIMAL_PIPELINE = {
|
|
113
|
+
name: "minimal",
|
|
114
|
+
description: "Minimal fast evaluation for high-throughput scenarios",
|
|
115
|
+
scorers: [
|
|
116
|
+
{ id: "toxicity", config: { threshold: 0.9 } },
|
|
117
|
+
{ id: "hallucination", config: { threshold: 0.8 } },
|
|
118
|
+
],
|
|
119
|
+
aggregation: {
|
|
120
|
+
method: "minimum",
|
|
121
|
+
},
|
|
122
|
+
passThreshold: 0.8,
|
|
123
|
+
executionMode: "parallel",
|
|
124
|
+
timeout: 10000,
|
|
125
|
+
};
|
|
126
|
+
/**
|
|
127
|
+
* Summarization evaluation preset
|
|
128
|
+
* Evaluates summarization quality
|
|
129
|
+
*/
|
|
130
|
+
export const SUMMARIZATION_PIPELINE = {
|
|
131
|
+
name: "summarization",
|
|
132
|
+
description: "Summarization quality evaluation pipeline",
|
|
133
|
+
scorers: [
|
|
134
|
+
{ id: "summarization", config: { weight: 1.5 } },
|
|
135
|
+
{ id: "faithfulness", config: { weight: 1.2 } },
|
|
136
|
+
{ id: "content-similarity", config: { weight: 1.0 } },
|
|
137
|
+
{ id: "length", config: { weight: 0.8 } },
|
|
138
|
+
],
|
|
139
|
+
aggregation: {
|
|
140
|
+
method: "weighted",
|
|
141
|
+
weights: {
|
|
142
|
+
summarization: 1.5,
|
|
143
|
+
faithfulness: 1.2,
|
|
144
|
+
"content-similarity": 1.0,
|
|
145
|
+
length: 0.8,
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
passThreshold: 0.7,
|
|
149
|
+
executionMode: "parallel",
|
|
150
|
+
};
|
|
151
|
+
/**
|
|
152
|
+
* Customer support evaluation preset
|
|
153
|
+
* Tailored for customer service responses
|
|
154
|
+
*/
|
|
155
|
+
export const CUSTOMER_SUPPORT_PIPELINE = {
|
|
156
|
+
name: "customerSupport",
|
|
157
|
+
description: "Customer support response evaluation pipeline",
|
|
158
|
+
scorers: [
|
|
159
|
+
{ id: "toxicity", config: { threshold: 0.95, weight: 2.0 } },
|
|
160
|
+
{ id: "tone-consistency", config: { weight: 1.5 } },
|
|
161
|
+
{ id: "prompt-alignment", config: { weight: 1.2 } },
|
|
162
|
+
{ id: "answer-relevancy", config: { weight: 1.0 } },
|
|
163
|
+
],
|
|
164
|
+
aggregation: {
|
|
165
|
+
method: "weighted",
|
|
166
|
+
weights: {
|
|
167
|
+
toxicity: 2.0,
|
|
168
|
+
"tone-consistency": 1.5,
|
|
169
|
+
"prompt-alignment": 1.2,
|
|
170
|
+
"answer-relevancy": 1.0,
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
passThreshold: 0.8,
|
|
174
|
+
executionMode: "parallel",
|
|
175
|
+
requiredScorers: ["toxicity"],
|
|
176
|
+
};
|
|
177
|
+
/**
|
|
178
|
+
* Code generation evaluation preset
|
|
179
|
+
* Evaluates generated code quality
|
|
180
|
+
*/
|
|
181
|
+
export const CODE_GENERATION_PIPELINE = {
|
|
182
|
+
name: "codeGeneration",
|
|
183
|
+
description: "Code generation quality evaluation pipeline",
|
|
184
|
+
scorers: [
|
|
185
|
+
{ id: "format", config: { weight: 1.0 } },
|
|
186
|
+
{ id: "prompt-alignment", config: { weight: 1.5 } },
|
|
187
|
+
{ id: "hallucination", config: { weight: 1.2 } },
|
|
188
|
+
],
|
|
189
|
+
aggregation: {
|
|
190
|
+
method: "weighted",
|
|
191
|
+
weights: {
|
|
192
|
+
format: 1.0,
|
|
193
|
+
"prompt-alignment": 1.5,
|
|
194
|
+
hallucination: 1.2,
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
passThreshold: 0.75,
|
|
198
|
+
executionMode: "sequential",
|
|
199
|
+
};
|
|
200
|
+
/**
|
|
201
|
+
* All available presets
|
|
202
|
+
*/
|
|
203
|
+
export const PipelinePresets = {
|
|
204
|
+
safety: SAFETY_PIPELINE,
|
|
205
|
+
rag: RAG_PIPELINE,
|
|
206
|
+
quality: QUALITY_PIPELINE,
|
|
207
|
+
comprehensive: COMPREHENSIVE_PIPELINE,
|
|
208
|
+
minimal: MINIMAL_PIPELINE,
|
|
209
|
+
summarization: SUMMARIZATION_PIPELINE,
|
|
210
|
+
customerSupport: CUSTOMER_SUPPORT_PIPELINE,
|
|
211
|
+
codeGeneration: CODE_GENERATION_PIPELINE,
|
|
212
|
+
};
|
|
213
|
+
/**
|
|
214
|
+
* Get a preset pipeline configuration by name
|
|
215
|
+
*/
|
|
216
|
+
export function getPreset(name) {
|
|
217
|
+
return PipelinePresets[name];
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Get all available preset names
|
|
221
|
+
*/
|
|
222
|
+
export function getPresetNames() {
|
|
223
|
+
return Object.keys(PipelinePresets);
|
|
224
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Batch Strategy
|
|
3
|
+
* Batch processing for evaluation pipelines
|
|
4
|
+
*/
|
|
5
|
+
import type { ScorerInput } from "../../../types/scorerTypes.js";
|
|
6
|
+
import type { EvaluationPipeline, PipelineExecutionOptions, PipelineResult } from "../evaluationPipeline.js";
|
|
7
|
+
/**
|
|
8
|
+
* Batch processing configuration
|
|
9
|
+
*/
|
|
10
|
+
export type BatchConfig = {
|
|
11
|
+
/** Maximum concurrent evaluations */
|
|
12
|
+
concurrency?: number;
|
|
13
|
+
/** Delay between batches (ms) */
|
|
14
|
+
batchDelay?: number;
|
|
15
|
+
/** Continue on individual failures */
|
|
16
|
+
continueOnError?: boolean;
|
|
17
|
+
/** Progress callback */
|
|
18
|
+
onProgress?: (progress: BatchProgress) => void;
|
|
19
|
+
/** Individual result callback */
|
|
20
|
+
onResult?: (result: BatchItemResult) => void;
|
|
21
|
+
};
|
|
22
|
+
/**
|
|
23
|
+
* Batch progress information
|
|
24
|
+
*/
|
|
25
|
+
export type BatchProgress = {
|
|
26
|
+
total: number;
|
|
27
|
+
completed: number;
|
|
28
|
+
failed: number;
|
|
29
|
+
remaining: number;
|
|
30
|
+
percentComplete: number;
|
|
31
|
+
estimatedTimeRemaining?: number;
|
|
32
|
+
};
|
|
33
|
+
/**
|
|
34
|
+
* Individual batch item result
|
|
35
|
+
*/
|
|
36
|
+
export type BatchItemResult = {
|
|
37
|
+
index: number;
|
|
38
|
+
input: ScorerInput;
|
|
39
|
+
result?: PipelineResult;
|
|
40
|
+
error?: string;
|
|
41
|
+
duration: number;
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* Batch evaluation result
|
|
45
|
+
*/
|
|
46
|
+
export type BatchResult = {
|
|
47
|
+
/** All individual results */
|
|
48
|
+
results: BatchItemResult[];
|
|
49
|
+
/** Summary statistics */
|
|
50
|
+
summary: {
|
|
51
|
+
total: number;
|
|
52
|
+
successful: number;
|
|
53
|
+
failed: number;
|
|
54
|
+
averageScore: number;
|
|
55
|
+
passRate: number;
|
|
56
|
+
totalDuration: number;
|
|
57
|
+
averageDuration: number;
|
|
58
|
+
};
|
|
59
|
+
};
|
|
60
|
+
/**
|
|
61
|
+
* Batch evaluation strategy
|
|
62
|
+
*/
|
|
63
|
+
export declare class BatchStrategy {
|
|
64
|
+
private _pipeline;
|
|
65
|
+
private _config;
|
|
66
|
+
constructor(pipeline: EvaluationPipeline, config?: BatchConfig);
|
|
67
|
+
/**
|
|
68
|
+
* Evaluate a batch of inputs
|
|
69
|
+
*/
|
|
70
|
+
evaluate(inputs: ScorerInput[], options?: PipelineExecutionOptions): Promise<BatchResult>;
|
|
71
|
+
/**
|
|
72
|
+
* Evaluate a single item
|
|
73
|
+
*/
|
|
74
|
+
private _evaluateItem;
|
|
75
|
+
/**
|
|
76
|
+
* Estimate remaining time based on average duration
|
|
77
|
+
*/
|
|
78
|
+
private _estimateRemainingTime;
|
|
79
|
+
/**
|
|
80
|
+
* Delay helper
|
|
81
|
+
*/
|
|
82
|
+
private _delay;
|
|
83
|
+
/**
|
|
84
|
+
* Update configuration
|
|
85
|
+
*/
|
|
86
|
+
configure(config: Partial<BatchConfig>): void;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Create a batch strategy for a pipeline
|
|
90
|
+
*/
|
|
91
|
+
export declare function createBatchStrategy(pipeline: EvaluationPipeline, config?: BatchConfig): BatchStrategy;
|
|
92
|
+
/**
|
|
93
|
+
* Evaluate a batch of inputs using a pipeline
|
|
94
|
+
*/
|
|
95
|
+
export declare function evaluateBatch(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?: BatchConfig): Promise<BatchResult>;
|
|
96
|
+
/**
|
|
97
|
+
* Stream batch evaluation results
|
|
98
|
+
*/
|
|
99
|
+
export declare function streamBatchEvaluation(pipeline: EvaluationPipeline, inputs: ScorerInput[], config?: Omit<BatchConfig, "onResult" | "onProgress">): AsyncGenerator<BatchItemResult, BatchResult["summary"], void>;
|