@juspay/neurolink 9.36.0 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/commands/proxy.js +6 -6
  9. package/dist/cli/parser.js +4 -1
  10. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  11. package/dist/evaluation/BatchEvaluator.js +267 -0
  12. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  13. package/dist/evaluation/EvaluationAggregator.js +377 -0
  14. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  15. package/dist/evaluation/EvaluatorFactory.js +280 -0
  16. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  17. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  18. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  19. package/dist/evaluation/errors/EvaluationError.js +206 -0
  20. package/dist/evaluation/errors/index.d.ts +4 -0
  21. package/dist/evaluation/errors/index.js +4 -0
  22. package/dist/evaluation/hooks/index.d.ts +6 -0
  23. package/dist/evaluation/hooks/index.js +6 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  25. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  26. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  27. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  28. package/dist/evaluation/index.d.ts +11 -2
  29. package/dist/evaluation/index.js +15 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  31. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  32. package/dist/evaluation/pipeline/index.d.ts +8 -0
  33. package/dist/evaluation/pipeline/index.js +8 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  35. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  36. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  37. package/dist/evaluation/pipeline/presets.js +224 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  39. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  40. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  41. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  43. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  44. package/dist/evaluation/reporting/index.d.ts +6 -0
  45. package/dist/evaluation/reporting/index.js +6 -0
  46. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  47. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  48. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  49. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  50. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  51. package/dist/evaluation/scorers/baseScorer.js +232 -0
  52. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  53. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  54. package/dist/evaluation/scorers/index.d.ts +10 -0
  55. package/dist/evaluation/scorers/index.js +16 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  57. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  59. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  61. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  63. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  65. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  67. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  69. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  70. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  71. package/dist/evaluation/scorers/llm/index.js +16 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  73. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  75. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  77. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  79. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  81. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  83. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  85. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  86. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  87. package/dist/evaluation/scorers/rule/index.js +10 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  89. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  91. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  92. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  93. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  94. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  95. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  96. package/dist/index.d.ts +37 -25
  97. package/dist/index.js +65 -26
  98. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  99. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  100. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  102. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  104. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  106. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  108. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  109. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  110. package/dist/lib/evaluation/errors/index.js +5 -0
  111. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  112. package/dist/lib/evaluation/hooks/index.js +7 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  114. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  116. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  117. package/dist/lib/evaluation/index.d.ts +11 -2
  118. package/dist/lib/evaluation/index.js +15 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  120. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  121. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  122. package/dist/lib/evaluation/pipeline/index.js +9 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  124. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  125. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  126. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  128. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  130. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  132. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  133. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  134. package/dist/lib/evaluation/reporting/index.js +7 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  136. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  138. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  140. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  142. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  143. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  144. package/dist/lib/evaluation/scorers/index.js +17 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  146. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  148. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  150. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  152. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  154. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  156. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  158. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  159. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  160. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  162. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  164. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  166. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  168. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  170. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  172. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  174. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  175. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  176. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  178. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  180. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  182. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  184. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  185. package/dist/lib/index.d.ts +37 -25
  186. package/dist/lib/index.js +65 -26
  187. package/dist/lib/neurolink.d.ts +204 -0
  188. package/dist/lib/neurolink.js +296 -0
  189. package/dist/lib/types/index.d.ts +3 -1
  190. package/dist/lib/types/index.js +3 -2
  191. package/dist/lib/types/scorerTypes.d.ts +423 -0
  192. package/dist/lib/types/scorerTypes.js +6 -0
  193. package/dist/lib/utils/errorHandling.d.ts +20 -0
  194. package/dist/lib/utils/errorHandling.js +60 -0
  195. package/dist/neurolink.d.ts +204 -0
  196. package/dist/neurolink.js +296 -0
  197. package/dist/types/index.d.ts +3 -1
  198. package/dist/types/index.js +3 -2
  199. package/dist/types/scorerTypes.d.ts +423 -0
  200. package/dist/types/scorerTypes.js +5 -0
  201. package/dist/utils/errorHandling.d.ts +20 -0
  202. package/dist/utils/errorHandling.js +60 -0
  203. package/package.json +1 -1
@@ -0,0 +1,114 @@
1
+ /**
2
+ * @file Evaluation Pipeline
3
+ * Multi-scorer orchestration with configurable execution
4
+ */
5
+ import type { JsonObject } from "../../types/common.js";
6
+ import type { AggregatedScores, PipelineConfig, Scorer, ScorerInput } from "../../types/scorerTypes.js";
7
+ /**
8
+ * Pipeline execution options
9
+ */
10
+ export type PipelineExecutionOptions = {
11
+ /** Correlation ID for tracing */
12
+ correlationId?: string;
13
+ /** Custom timeout override */
14
+ timeout?: number;
15
+ /** Skip specific scorers */
16
+ skipScorers?: string[];
17
+ /** Only run specific scorers */
18
+ onlyScorers?: string[];
19
+ /** Additional metadata to attach */
20
+ metadata?: JsonObject;
21
+ };
22
+ /**
23
+ * Pipeline execution result
24
+ */
25
+ export type PipelineResult = AggregatedScores & {
26
+ /** Pipeline configuration used */
27
+ pipelineConfig: PipelineConfig;
28
+ /** Execution options used */
29
+ executionOptions?: PipelineExecutionOptions;
30
+ /** Errors that occurred during execution */
31
+ errors: Array<{
32
+ scorerId: string;
33
+ error: string;
34
+ }>;
35
+ /** Scorers that were skipped */
36
+ skippedScorers: string[];
37
+ };
38
+ /**
39
+ * Evaluation Pipeline for running multiple scorers
40
+ */
41
+ export declare class EvaluationPipeline {
42
+ private _config;
43
+ private _scorers;
44
+ private _initialized;
45
+ constructor(config: PipelineConfig);
46
+ /**
47
+ * Get pipeline configuration
48
+ */
49
+ get config(): PipelineConfig;
50
+ /**
51
+ * Check if pipeline is initialized
52
+ */
53
+ get initialized(): boolean;
54
+ /**
55
+ * Initialize the pipeline by loading all scorers
56
+ */
57
+ initialize(): Promise<void>;
58
+ /**
59
+ * Execute the pipeline on input
60
+ */
61
+ execute(input: ScorerInput, options?: PipelineExecutionOptions): Promise<PipelineResult>;
62
+ /**
63
+ * Get scorers to run based on options
64
+ */
65
+ private _getScorersToRun;
66
+ /**
67
+ * Get list of skipped scorers
68
+ */
69
+ private _getSkippedScorers;
70
+ /**
71
+ * Execute a single scorer with timeout
72
+ */
73
+ private _executeScorer;
74
+ /**
75
+ * Aggregate scores based on configuration
76
+ */
77
+ /**
78
+ * Rescale a result's score to the default 0-MAX scale using its own scale info
79
+ */
80
+ private _rescaleToDefault;
81
+ private _aggregateScores;
82
+ /**
83
+ * Add a scorer to the pipeline
84
+ */
85
+ addScorer(id: string, scorer: Scorer): void;
86
+ /**
87
+ * Remove a scorer from the pipeline
88
+ */
89
+ removeScorer(id: string): boolean;
90
+ /**
91
+ * Get a scorer by ID
92
+ */
93
+ getScorer(id: string): Scorer | undefined;
94
+ /**
95
+ * Get all scorer IDs
96
+ */
97
+ getScorerIds(): string[];
98
+ /**
99
+ * Update pipeline configuration
100
+ */
101
+ configure(config: Partial<PipelineConfig>): void;
102
+ /**
103
+ * Create a clone of this pipeline
104
+ */
105
+ clone(): EvaluationPipeline;
106
+ }
107
+ /**
108
+ * Create a new evaluation pipeline
109
+ */
110
+ export declare function createPipeline(config: PipelineConfig): EvaluationPipeline;
111
+ /**
112
+ * Create and initialize a pipeline
113
+ */
114
+ export declare function createAndInitializePipeline(config: PipelineConfig): Promise<EvaluationPipeline>;
@@ -0,0 +1,381 @@
1
+ /**
2
+ * @file Evaluation Pipeline
3
+ * Multi-scorer orchestration with configurable execution
4
+ */
5
+ import { logger } from "../../utils/logger.js";
6
+ import { withTimeout } from "../../utils/errorHandling.js";
7
+ import { DEFAULT_SCORE_SCALE } from "../scorers/baseScorer.js";
8
+ import { ScorerRegistry } from "../scorers/scorerRegistry.js";
9
+ /**
10
+ * Evaluation Pipeline for running multiple scorers
11
+ */
12
+ export class EvaluationPipeline {
13
+ _config;
14
+ _scorers = new Map();
15
+ _initialized = false;
16
+ constructor(config) {
17
+ this._config = {
18
+ executionMode: "parallel",
19
+ stopOnFailure: false,
20
+ passThreshold: 0.7,
21
+ ...config,
22
+ };
23
+ }
24
+ /**
25
+ * Get pipeline configuration
26
+ */
27
+ get config() {
28
+ return this._config;
29
+ }
30
+ /**
31
+ * Check if pipeline is initialized
32
+ */
33
+ get initialized() {
34
+ return this._initialized;
35
+ }
36
+ /**
37
+ * Initialize the pipeline by loading all scorers
38
+ */
39
+ async initialize() {
40
+ if (this._initialized) {
41
+ return;
42
+ }
43
+ logger.debug(`Initializing evaluation pipeline: ${this._config.name ?? "unnamed"}`);
44
+ // Initialize registry
45
+ await ScorerRegistry.registerBuiltInScorers();
46
+ // Load all configured scorers using canonical IDs as map keys
47
+ for (const scorerDef of this._config.scorers) {
48
+ try {
49
+ const scorer = await ScorerRegistry.getScorer(scorerDef.id, scorerDef.config);
50
+ if (scorer) {
51
+ const canonicalId = scorer.metadata.id;
52
+ this._scorers.set(canonicalId, scorer);
53
+ logger.debug(`Loaded scorer: ${scorerDef.id} (canonical: ${canonicalId})`);
54
+ }
55
+ else {
56
+ logger.warn(`Scorer not found: ${scorerDef.id}`);
57
+ }
58
+ }
59
+ catch (error) {
60
+ logger.error(`Failed to load scorer: ${scorerDef.id}`, {
61
+ error: error instanceof Error ? error.message : String(error),
62
+ });
63
+ }
64
+ }
65
+ // Normalize requiredScorers to canonical IDs
66
+ if (this._config.requiredScorers) {
67
+ this._config.requiredScorers = this._config.requiredScorers.map((id) => {
68
+ // Look up by alias first, then try canonical
69
+ for (const [canonicalId, scorer] of this._scorers) {
70
+ if (scorer.metadata.id === id || canonicalId === id) {
71
+ return canonicalId;
72
+ }
73
+ }
74
+ return id;
75
+ });
76
+ }
77
+ // Validate required scorers are actually loaded
78
+ if (this._config.requiredScorers) {
79
+ const missing = this._config.requiredScorers.filter((id) => !this._scorers.has(id));
80
+ if (missing.length > 0) {
81
+ throw new Error(`Required scorers could not be loaded: ${missing.join(", ")}`);
82
+ }
83
+ }
84
+ this._initialized = true;
85
+ logger.debug(`Pipeline initialized with ${this._scorers.size} scorers`);
86
+ }
87
+ /**
88
+ * Execute the pipeline on input
89
+ */
90
+ async execute(input, options) {
91
+ if (!this._initialized) {
92
+ await this.initialize();
93
+ }
94
+ const startTime = Date.now();
95
+ const correlationId = options?.correlationId ?? `pipeline-${Date.now()}`;
96
+ logger.debug(`Executing pipeline: ${this._config.name ?? "unnamed"}`, {
97
+ correlationId,
98
+ scorerCount: this._scorers.size,
99
+ });
100
+ // Determine which scorers to run
101
+ const scorersToRun = this._getScorersToRun(options);
102
+ const skippedScorers = this._getSkippedScorers(options);
103
+ // Execute scorers
104
+ const results = [];
105
+ const errors = [];
106
+ if (this._config.executionMode === "parallel") {
107
+ // Parallel execution
108
+ const promises = scorersToRun.map(([id, scorer]) => this._executeScorer(id, scorer, input, options?.timeout));
109
+ const settledResults = await Promise.allSettled(promises);
110
+ for (let i = 0; i < settledResults.length; i++) {
111
+ const result = settledResults[i];
112
+ const [id] = scorersToRun[i];
113
+ if (result.status === "fulfilled") {
114
+ results.push(result.value);
115
+ if (result.value.error) {
116
+ errors.push({ scorerId: id, error: result.value.error });
117
+ }
118
+ }
119
+ else {
120
+ errors.push({
121
+ scorerId: id,
122
+ error: result.reason?.message ?? "Unknown error",
123
+ });
124
+ }
125
+ }
126
+ }
127
+ else {
128
+ // Sequential execution
129
+ for (const [id, scorer] of scorersToRun) {
130
+ try {
131
+ const result = await this._executeScorer(id, scorer, input, options?.timeout);
132
+ results.push(result);
133
+ if (result.error) {
134
+ errors.push({ scorerId: id, error: result.error });
135
+ }
136
+ // Check for stop on failure
137
+ if (this._config.stopOnFailure && !result.passed) {
138
+ logger.debug(`Stopping pipeline execution: scorer ${id} failed`);
139
+ break;
140
+ }
141
+ }
142
+ catch (error) {
143
+ const errorMessage = error instanceof Error ? error.message : String(error);
144
+ errors.push({ scorerId: id, error: errorMessage });
145
+ if (this._config.stopOnFailure) {
146
+ break;
147
+ }
148
+ }
149
+ }
150
+ }
151
+ // Aggregate results
152
+ const aggregated = this._aggregateScores(results);
153
+ const totalComputeTime = Date.now() - startTime;
154
+ // Check required scorers
155
+ const requiredScorers = this._config.requiredScorers ?? [];
156
+ const allRequiredPassed = requiredScorers.every((id) => {
157
+ const result = results.find((r) => r.scorerId === id);
158
+ return result?.passed ?? false;
159
+ });
160
+ const overallPassed = aggregated.normalizedScore >= (this._config.passThreshold ?? 0.7) &&
161
+ allRequiredPassed;
162
+ return {
163
+ scores: results,
164
+ overallScore: aggregated.score,
165
+ aggregationMethod: this._config.aggregation?.method ?? "average",
166
+ passed: overallPassed,
167
+ totalComputeTime,
168
+ timestamp: Date.now(),
169
+ correlationId,
170
+ pipelineConfig: this._config,
171
+ executionOptions: options,
172
+ errors,
173
+ skippedScorers,
174
+ };
175
+ }
176
+ /**
177
+ * Get scorers to run based on options
178
+ */
179
+ _getScorersToRun(options) {
180
+ const allScorers = Array.from(this._scorers.entries());
181
+ if (options?.onlyScorers && options.onlyScorers.length > 0) {
182
+ return allScorers.filter(([id]) => options.onlyScorers.includes(id));
183
+ }
184
+ if (options?.skipScorers && options.skipScorers.length > 0) {
185
+ return allScorers.filter(([id]) => !options.skipScorers.includes(id));
186
+ }
187
+ return allScorers;
188
+ }
189
+ /**
190
+ * Get list of skipped scorers
191
+ */
192
+ _getSkippedScorers(options) {
193
+ const allIds = Array.from(this._scorers.keys());
194
+ if (options?.onlyScorers && options.onlyScorers.length > 0) {
195
+ return allIds.filter((id) => !options.onlyScorers.includes(id));
196
+ }
197
+ if (options?.skipScorers && options.skipScorers.length > 0) {
198
+ return options.skipScorers.filter((id) => allIds.includes(id));
199
+ }
200
+ return [];
201
+ }
202
+ /**
203
+ * Execute a single scorer with timeout
204
+ */
205
+ async _executeScorer(id, scorer, input, timeout) {
206
+ const scorerTimeout = timeout ?? scorer.config.timeout ?? this._config.timeout ?? 30000;
207
+ try {
208
+ const result = await withTimeout(scorer.score(input), scorerTimeout, new Error(`Scorer ${id} timed out after ${scorerTimeout}ms`));
209
+ return result;
210
+ }
211
+ catch (error) {
212
+ const errorMessage = error instanceof Error ? error.message : String(error);
213
+ return {
214
+ scorerId: id,
215
+ scorerName: scorer.metadata.name,
216
+ score: 0,
217
+ normalizedScore: 0,
218
+ scale: DEFAULT_SCORE_SCALE,
219
+ reasoning: `Scorer execution failed: ${errorMessage}`,
220
+ passed: false,
221
+ threshold: scorer.config.threshold ?? 0.7,
222
+ computeTime: 0,
223
+ error: errorMessage,
224
+ };
225
+ }
226
+ }
227
+ /**
228
+ * Aggregate scores based on configuration
229
+ */
230
+ /**
231
+ * Rescale a result's score to the default 0-MAX scale using its own scale info
232
+ */
233
+ _rescaleToDefault(result) {
234
+ const scale = result.scale ?? DEFAULT_SCORE_SCALE;
235
+ if (scale.max === scale.min) {
236
+ return 0;
237
+ }
238
+ // Normalize to 0-1 then rescale to default
239
+ const normalized = (result.score - scale.min) / (scale.max - scale.min);
240
+ return normalized * DEFAULT_SCORE_SCALE.max;
241
+ }
242
+ _aggregateScores(results) {
243
+ if (results.length === 0) {
244
+ return { score: 0, normalizedScore: 0 };
245
+ }
246
+ const aggregation = this._config.aggregation ?? { method: "average" };
247
+ const weights = aggregation.weights ?? {};
248
+ // Rescale all results to the common default scale before aggregation
249
+ const rescaled = results.map((r) => this._rescaleToDefault(r));
250
+ let score;
251
+ switch (aggregation.method) {
252
+ case "minimum":
253
+ score = Math.min(...rescaled);
254
+ break;
255
+ case "maximum":
256
+ score = Math.max(...rescaled);
257
+ break;
258
+ case "weighted": {
259
+ let totalWeight = 0;
260
+ let weightedSum = 0;
261
+ // Build a reverse map from canonical scorer ID to configured key
262
+ const configuredKeyMap = new Map();
263
+ for (const scorerDef of this._config.scorers) {
264
+ const scorer = this._scorers.get(scorerDef.id);
265
+ if (scorer) {
266
+ configuredKeyMap.set(scorer.metadata.id, scorerDef.id);
267
+ }
268
+ }
269
+ for (let i = 0; i < results.length; i++) {
270
+ const result = results[i];
271
+ const configuredKey = configuredKeyMap.get(result.scorerId);
272
+ const weight = weights[result.scorerId] ??
273
+ (configuredKey ? weights[configuredKey] : undefined) ??
274
+ 1.0;
275
+ totalWeight += weight;
276
+ weightedSum += rescaled[i] * weight;
277
+ }
278
+ score = totalWeight > 0 ? weightedSum / totalWeight : 0;
279
+ break;
280
+ }
281
+ case "custom":
282
+ if (aggregation.customFn) {
283
+ score = aggregation.customFn(results);
284
+ // Clamp custom output to valid range
285
+ score = Math.max(0, Math.min(DEFAULT_SCORE_SCALE.max, score));
286
+ }
287
+ else {
288
+ score = rescaled.reduce((sum, s) => sum + s, 0) / rescaled.length;
289
+ }
290
+ break;
291
+ case "average":
292
+ default:
293
+ score = rescaled.reduce((sum, s) => sum + s, 0) / rescaled.length;
294
+ break;
295
+ }
296
+ const normalizedScore = score / DEFAULT_SCORE_SCALE.max;
297
+ return { score, normalizedScore };
298
+ }
299
+ /**
300
+ * Add a scorer to the pipeline
301
+ */
302
+ addScorer(id, scorer) {
303
+ this._scorers.set(id, scorer);
304
+ // Update config
305
+ if (!this._config.scorers.some((s) => s.id === id)) {
306
+ this._config.scorers.push({ id });
307
+ }
308
+ }
309
+ /**
310
+ * Remove a scorer from the pipeline
311
+ */
312
+ removeScorer(id) {
313
+ const removed = this._scorers.delete(id);
314
+ if (removed) {
315
+ this._config.scorers = this._config.scorers.filter((s) => s.id !== id);
316
+ this._config.requiredScorers = this._config.requiredScorers?.filter((requiredId) => requiredId !== id);
317
+ }
318
+ return removed;
319
+ }
320
+ /**
321
+ * Get a scorer by ID
322
+ */
323
+ getScorer(id) {
324
+ return this._scorers.get(id);
325
+ }
326
+ /**
327
+ * Get all scorer IDs
328
+ */
329
+ getScorerIds() {
330
+ return Array.from(this._scorers.keys());
331
+ }
332
+ /**
333
+ * Update pipeline configuration
334
+ */
335
+ configure(config) {
336
+ this._config = { ...this._config, ...config };
337
+ }
338
+ /**
339
+ * Create a clone of this pipeline
340
+ */
341
+ clone() {
342
+ const clonedConfig = {
343
+ ...this._config,
344
+ scorers: this._config.scorers.map((s) => ({
345
+ id: s.id,
346
+ config: s.config ? { ...s.config } : undefined,
347
+ })),
348
+ aggregation: this._config.aggregation
349
+ ? {
350
+ ...this._config.aggregation,
351
+ weights: this._config.aggregation.weights
352
+ ? { ...this._config.aggregation.weights }
353
+ : undefined,
354
+ }
355
+ : undefined,
356
+ requiredScorers: this._config.requiredScorers
357
+ ? [...this._config.requiredScorers]
358
+ : undefined,
359
+ };
360
+ const cloned = new EvaluationPipeline(clonedConfig);
361
+ // Do not copy scorer instances to avoid shared mutable state
362
+ // (e.g., BaseLLMScorer.provider, initializationPromise).
363
+ // The cloned pipeline will create fresh scorers on initialize().
364
+ cloned._initialized = false;
365
+ return cloned;
366
+ }
367
+ }
368
+ /**
369
+ * Create a new evaluation pipeline
370
+ */
371
+ export function createPipeline(config) {
372
+ return new EvaluationPipeline(config);
373
+ }
374
+ /**
375
+ * Create and initialize a pipeline
376
+ */
377
+ export async function createAndInitializePipeline(config) {
378
+ const pipeline = new EvaluationPipeline(config);
379
+ await pipeline.initialize();
380
+ return pipeline;
381
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * @file Pipeline Index
3
+ * Export all pipeline components
4
+ */
5
+ export { createAndInitializePipeline, createPipeline, EvaluationPipeline, type PipelineExecutionOptions, type PipelineResult, } from "./evaluationPipeline.js";
6
+ export { PipelineBuilder, Pipelines } from "./pipelineBuilder.js";
7
+ export { CODE_GENERATION_PIPELINE, COMPREHENSIVE_PIPELINE, CUSTOMER_SUPPORT_PIPELINE, getPreset, getPresetNames, MINIMAL_PIPELINE, PipelinePresets, QUALITY_PIPELINE, RAG_PIPELINE, SAFETY_PIPELINE, SUMMARIZATION_PIPELINE, } from "./presets.js";
8
+ export * from "./strategies/index.js";
@@ -0,0 +1,8 @@
1
+ /**
2
+ * @file Pipeline Index
3
+ * Export all pipeline components
4
+ */
5
+ export { createAndInitializePipeline, createPipeline, EvaluationPipeline, } from "./evaluationPipeline.js";
6
+ export { PipelineBuilder, Pipelines } from "./pipelineBuilder.js";
7
+ export { CODE_GENERATION_PIPELINE, COMPREHENSIVE_PIPELINE, CUSTOMER_SUPPORT_PIPELINE, getPreset, getPresetNames, MINIMAL_PIPELINE, PipelinePresets, QUALITY_PIPELINE, RAG_PIPELINE, SAFETY_PIPELINE, SUMMARIZATION_PIPELINE, } from "./presets.js";
8
+ export * from "./strategies/index.js";
@@ -0,0 +1,126 @@
1
+ /**
2
+ * @file Pipeline Builder
3
+ * Fluent builder API for creating evaluation pipelines
4
+ */
5
+ import type { AggregationMethod, PipelineConfig, ScoreResult, ScorerConfig } from "../../types/scorerTypes.js";
6
+ import { EvaluationPipeline } from "./evaluationPipeline.js";
7
+ /**
8
+ * Fluent builder for creating evaluation pipelines
9
+ */
10
+ export declare class PipelineBuilder {
11
+ private _name?;
12
+ private _description?;
13
+ private _scorers;
14
+ private _aggregation;
15
+ private _passThreshold;
16
+ private _executionMode;
17
+ private _stopOnFailure;
18
+ private _timeout?;
19
+ private _requiredScorers;
20
+ constructor(name?: string);
21
+ /**
22
+ * Create a new pipeline builder
23
+ */
24
+ static create(name?: string): PipelineBuilder;
25
+ /**
26
+ * Set pipeline name
27
+ */
28
+ name(name: string): this;
29
+ /**
30
+ * Set pipeline description
31
+ */
32
+ description(desc: string): this;
33
+ /**
34
+ * Add a scorer by ID
35
+ */
36
+ addScorer(id: string, config?: ScorerConfig): this;
37
+ /**
38
+ * Add multiple scorers
39
+ */
40
+ addScorers(...ids: string[]): this;
41
+ /**
42
+ * Add a scorer and mark it as required
43
+ */
44
+ requireScorer(id: string, config?: ScorerConfig): this;
45
+ /**
46
+ * Set aggregation method
47
+ */
48
+ aggregateWith(method: AggregationMethod): this;
49
+ /**
50
+ * Set weights for weighted aggregation
51
+ */
52
+ withWeights(weights: Record<string, number>): this;
53
+ /**
54
+ * Set custom aggregation function
55
+ */
56
+ customAggregation(fn: (scores: ScoreResult[]) => number): this;
57
+ /**
58
+ * Set pass/fail threshold
59
+ */
60
+ passThreshold(threshold: number): this;
61
+ /**
62
+ * Run scorers in parallel (default)
63
+ */
64
+ parallel(): this;
65
+ /**
66
+ * Run scorers sequentially
67
+ */
68
+ sequential(): this;
69
+ /**
70
+ * Stop pipeline on first failure
71
+ */
72
+ stopOnFailure(): this;
73
+ /**
74
+ * Continue pipeline on failures (default)
75
+ */
76
+ continueOnFailure(): this;
77
+ /**
78
+ * Set pipeline timeout
79
+ */
80
+ timeout(ms: number): this;
81
+ /**
82
+ * Build the pipeline configuration
83
+ */
84
+ buildConfig(): PipelineConfig;
85
+ /**
86
+ * Build the pipeline (not initialized)
87
+ */
88
+ build(): EvaluationPipeline;
89
+ /**
90
+ * Build and initialize the pipeline
91
+ */
92
+ buildAndInitialize(): Promise<EvaluationPipeline>;
93
+ }
94
+ /**
95
+ * Quick pipeline builder factory
96
+ */
97
+ export declare const Pipelines: {
98
+ /**
99
+ * Create a new pipeline builder
100
+ */
101
+ create: (name?: string) => PipelineBuilder;
102
+ /**
103
+ * Create a safety-focused pipeline
104
+ */
105
+ safety: () => PipelineBuilder;
106
+ /**
107
+ * Create a RAG evaluation pipeline
108
+ */
109
+ rag: () => PipelineBuilder;
110
+ /**
111
+ * Create a quality-focused pipeline
112
+ */
113
+ quality: () => PipelineBuilder;
114
+ /**
115
+ * Create a comprehensive pipeline with all scorers
116
+ */
117
+ comprehensive: () => PipelineBuilder;
118
+ /**
119
+ * Create a minimal fast pipeline
120
+ */
121
+ minimal: () => PipelineBuilder;
122
+ /**
123
+ * Create a summarization evaluation pipeline
124
+ */
125
+ summarization: () => PipelineBuilder;
126
+ };