@juspay/neurolink 9.36.1 → 9.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +1105 -556
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
  189. package/dist/lib/processors/media/VideoProcessor.js +90 -41
  190. package/dist/lib/telemetry/telemetryService.d.ts +1 -1
  191. package/dist/lib/telemetry/telemetryService.js +27 -13
  192. package/dist/lib/types/index.d.ts +3 -1
  193. package/dist/lib/types/index.js +3 -2
  194. package/dist/lib/types/scorerTypes.d.ts +423 -0
  195. package/dist/lib/types/scorerTypes.js +6 -0
  196. package/dist/lib/utils/errorHandling.d.ts +20 -0
  197. package/dist/lib/utils/errorHandling.js +60 -0
  198. package/dist/neurolink.d.ts +204 -0
  199. package/dist/neurolink.js +296 -0
  200. package/dist/processors/media/VideoProcessor.d.ts +8 -2
  201. package/dist/processors/media/VideoProcessor.js +90 -41
  202. package/dist/telemetry/telemetryService.d.ts +1 -1
  203. package/dist/telemetry/telemetryService.js +27 -13
  204. package/dist/types/index.d.ts +3 -1
  205. package/dist/types/index.js +3 -2
  206. package/dist/types/scorerTypes.d.ts +423 -0
  207. package/dist/types/scorerTypes.js +5 -0
  208. package/dist/utils/errorHandling.d.ts +20 -0
  209. package/dist/utils/errorHandling.js +60 -0
  210. package/package.json +7 -7
  211. package/dist/processors/media/ffprobe-static.d.ts +0 -4
@@ -14,6 +14,7 @@ import { ragCommand } from "./commands/rag.js";
14
14
  import { ObservabilityCommandFactory } from "./commands/observability.js";
15
15
  import { TelemetryCommandFactory } from "./commands/telemetry.js";
16
16
  import { proxyStartCommand, proxyStatusCommand, proxySetupCommand, proxyGuardCommand, proxyInstallCommand, proxyUninstallCommand, } from "./commands/proxy.js";
17
+ import { EvaluateCommandFactory } from "./commands/evaluate.js";
17
18
  // Enhanced CLI with Professional UX
18
19
  export function initializeCliParser() {
19
20
  return (yargs(hideBin(process.argv))
@@ -196,6 +197,8 @@ export function initializeCliParser() {
196
197
  .command(proxyUninstallCommand)
197
198
  .demandCommand(1, "Please specify a proxy subcommand: start, status, setup, guard, install, or uninstall"),
198
199
  handler: () => { },
199
- })); // Close the main return statement
200
+ })
201
+ // Evaluate Command Group - Using EvaluateCommandFactory
202
+ .command(EvaluateCommandFactory.createEvaluateCommand())); // Close the main return statement
200
203
  }
201
204
  //# sourceMappingURL=parser.js.map
@@ -0,0 +1,163 @@
1
+ /**
2
+ * @file BatchEvaluator - Supports batch evaluation of multiple responses.
3
+ * Enables parallel evaluation with configurable concurrency and error handling.
4
+ */
5
+ import type { LanguageModelV3CallOptions } from "@ai-sdk/provider";
6
+ import type { GenerateResult } from "../types/generateTypes.js";
7
+ import type { EvaluationConfig } from "../types/evaluationTypes.js";
8
+ import type { EvaluationData } from "../types/evaluation.js";
9
+ import type { AutoEvaluationConfig } from "../types/middlewareTypes.js";
10
+ /**
11
+ * Configuration for batch evaluation.
12
+ */
13
+ export interface BatchEvaluationConfig extends EvaluationConfig {
14
+ /** Maximum number of concurrent evaluations (default: 5) */
15
+ concurrency?: number;
16
+ /** Whether to continue on individual failures (default: true) */
17
+ continueOnError?: boolean;
18
+ /** Maximum retries for retryable errors (default: 2) */
19
+ maxRetries?: number;
20
+ /** Delay between retries in milliseconds (default: 1000) */
21
+ retryDelay?: number;
22
+ /** Callback for progress updates */
23
+ onProgress?: (progress: BatchProgress) => void;
24
+ /** Callback for individual evaluation completion */
25
+ onItemComplete?: (result: BatchEvaluationItemResult) => void;
26
+ }
27
+ /**
28
+ * Progress information for batch evaluation.
29
+ */
30
+ export interface BatchProgress {
31
+ /** Total items to evaluate */
32
+ total: number;
33
+ /** Items completed (success + failed) */
34
+ completed: number;
35
+ /** Items that succeeded */
36
+ succeeded: number;
37
+ /** Items that failed */
38
+ failed: number;
39
+ /** Items still pending */
40
+ pending: number;
41
+ /** Percentage complete */
42
+ percentComplete: number;
43
+ }
44
+ /**
45
+ * Input item for batch evaluation.
46
+ */
47
+ export interface BatchEvaluationItem {
48
+ /** Unique identifier for this item */
49
+ id: string;
50
+ /** The generation options */
51
+ options: LanguageModelV3CallOptions;
52
+ /** The generation result to evaluate */
53
+ result: GenerateResult;
54
+ /** Optional item-specific threshold override */
55
+ threshold?: number;
56
+ }
57
+ /**
58
+ * Result for a single item in batch evaluation.
59
+ */
60
+ export interface BatchEvaluationItemResult {
61
+ /** The item ID */
62
+ id: string;
63
+ /** Whether the evaluation succeeded */
64
+ success: boolean;
65
+ /** The evaluation data (if successful) */
66
+ data?: EvaluationData;
67
+ /** Error information (if failed) */
68
+ error?: {
69
+ message: string;
70
+ code?: string;
71
+ retryable?: boolean;
72
+ };
73
+ /** Time taken for this evaluation in milliseconds */
74
+ duration: number;
75
+ /** Number of retry attempts (if any) */
76
+ retryCount: number;
77
+ }
78
+ /**
79
+ * Result of a batch evaluation operation.
80
+ */
81
+ export interface BatchEvaluationResult {
82
+ /** All item results */
83
+ results: BatchEvaluationItemResult[];
84
+ /** Summary statistics */
85
+ summary: {
86
+ /** Total items evaluated */
87
+ total: number;
88
+ /** Number of successful evaluations */
89
+ succeeded: number;
90
+ /** Number of failed evaluations */
91
+ failed: number;
92
+ /** Average evaluation score (for successful items) */
93
+ averageScore: number;
94
+ /** Average evaluation time in milliseconds */
95
+ averageDuration: number;
96
+ /** Total time for batch evaluation */
97
+ totalDuration: number;
98
+ /** Passing rate (percentage of items meeting threshold) */
99
+ passingRate: number;
100
+ };
101
+ /** Whether all evaluations succeeded */
102
+ allSucceeded: boolean;
103
+ }
104
+ /**
105
+ * BatchEvaluator - Performs evaluation on multiple items in parallel.
106
+ * Supports configurable concurrency, retry logic, and progress tracking.
107
+ *
108
+ * @example
109
+ * ```typescript
110
+ * const batchEvaluator = new BatchEvaluator({
111
+ * concurrency: 3,
112
+ * continueOnError: true,
113
+ * onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
114
+ * });
115
+ *
116
+ * const items = [
117
+ * { id: '1', options: opts1, result: result1 },
118
+ * { id: '2', options: opts2, result: result2 },
119
+ * ];
120
+ *
121
+ * const batchResult = await batchEvaluator.evaluateBatch(items);
122
+ * console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
123
+ * ```
124
+ */
125
+ export declare class BatchEvaluator {
126
+ private config;
127
+ constructor(config?: BatchEvaluationConfig);
128
+ /**
129
+ * Create a fresh Evaluator instance for each evaluation to avoid leaking state.
130
+ */
131
+ private _createEvaluator;
132
+ /**
133
+ * Evaluates a batch of items in parallel with controlled concurrency.
134
+ *
135
+ * @param items - Array of items to evaluate
136
+ * @param autoEvalConfig - Auto-evaluation configuration for thresholds
137
+ * @returns Batch evaluation results with summary statistics
138
+ */
139
+ evaluateBatch(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
140
+ /**
141
+ * Evaluates items sequentially (one at a time).
142
+ * Useful for debugging or when order matters.
143
+ *
144
+ * @param items - Array of items to evaluate
145
+ * @param autoEvalConfig - Auto-evaluation configuration
146
+ * @returns Batch evaluation results
147
+ */
148
+ evaluateSequential(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
149
+ /**
150
+ * Gets the current configuration.
151
+ */
152
+ getConfig(): BatchEvaluationConfig;
153
+ /**
154
+ * Updates the configuration.
155
+ *
156
+ * @param config - New configuration values
157
+ */
158
+ updateConfig(config: Partial<BatchEvaluationConfig>): void;
159
+ /**
160
+ * Helper to delay execution.
161
+ */
162
+ private delay;
163
+ }
@@ -0,0 +1,267 @@
1
+ /**
2
+ * @file BatchEvaluator - Supports batch evaluation of multiple responses.
3
+ * Enables parallel evaluation with configurable concurrency and error handling.
4
+ */
5
+ import { Evaluator } from "./index.js";
6
+ import { createBatchEvaluationError, isRetryableEvaluationError, } from "./errors/EvaluationError.js";
7
+ import { logger } from "../utils/logger.js";
8
+ import { NeuroLinkFeatureError } from "../core/infrastructure/index.js";
9
+ /**
10
+ * BatchEvaluator - Performs evaluation on multiple items in parallel.
11
+ * Supports configurable concurrency, retry logic, and progress tracking.
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * const batchEvaluator = new BatchEvaluator({
16
+ * concurrency: 3,
17
+ * continueOnError: true,
18
+ * onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
19
+ * });
20
+ *
21
+ * const items = [
22
+ * { id: '1', options: opts1, result: result1 },
23
+ * { id: '2', options: opts2, result: result2 },
24
+ * ];
25
+ *
26
+ * const batchResult = await batchEvaluator.evaluateBatch(items);
27
+ * console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
28
+ * ```
29
+ */
30
+ export class BatchEvaluator {
31
+ config;
32
+ constructor(config = {}) {
33
+ this.config = {
34
+ concurrency: 5,
35
+ continueOnError: true,
36
+ maxRetries: 2,
37
+ retryDelay: 1000,
38
+ ...config,
39
+ };
40
+ }
41
+ /**
42
+ * Create a fresh Evaluator instance for each evaluation to avoid leaking state.
43
+ */
44
+ _createEvaluator() {
45
+ return new Evaluator(this.config);
46
+ }
47
+ /**
48
+ * Evaluates a batch of items in parallel with controlled concurrency.
49
+ *
50
+ * @param items - Array of items to evaluate
51
+ * @param autoEvalConfig - Auto-evaluation configuration for thresholds
52
+ * @returns Batch evaluation results with summary statistics
53
+ */
54
+ async evaluateBatch(items, autoEvalConfig = {}) {
55
+ const startTime = Date.now();
56
+ const results = [];
57
+ const concurrency = this.config.concurrency || 5;
58
+ // Track progress
59
+ let completed = 0;
60
+ let succeeded = 0;
61
+ let failed = 0;
62
+ const reportProgress = () => {
63
+ if (this.config.onProgress) {
64
+ try {
65
+ this.config.onProgress({
66
+ total: items.length,
67
+ completed,
68
+ succeeded,
69
+ failed,
70
+ pending: items.length - completed,
71
+ percentComplete: Math.round((completed / items.length) * 100),
72
+ });
73
+ }
74
+ catch (callbackError) {
75
+ logger.warn("[BatchEvaluator] onProgress callback threw an error", {
76
+ error: callbackError instanceof Error
77
+ ? callbackError.message
78
+ : String(callbackError),
79
+ });
80
+ }
81
+ }
82
+ };
83
+ // Process items with concurrency limit
84
+ const processItem = async (item) => {
85
+ const itemStartTime = Date.now();
86
+ let retryCount = 0;
87
+ let lastError;
88
+ while (retryCount <= (this.config.maxRetries || 2)) {
89
+ try {
90
+ const threshold = item.threshold ||
91
+ autoEvalConfig.threshold ||
92
+ this.config.threshold ||
93
+ 7;
94
+ // Create fresh evaluator per attempt to avoid leaking state
95
+ const evaluator = this._createEvaluator();
96
+ const data = await evaluator.evaluate(item.options, item.result, threshold, {
97
+ ...autoEvalConfig,
98
+ threshold,
99
+ });
100
+ const result = {
101
+ id: item.id,
102
+ success: true,
103
+ data,
104
+ duration: Date.now() - itemStartTime,
105
+ retryCount,
106
+ };
107
+ succeeded++;
108
+ completed++;
109
+ reportProgress();
110
+ if (this.config.onItemComplete) {
111
+ try {
112
+ this.config.onItemComplete(result);
113
+ }
114
+ catch (callbackError) {
115
+ logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
116
+ error: callbackError instanceof Error
117
+ ? callbackError.message
118
+ : String(callbackError),
119
+ });
120
+ }
121
+ }
122
+ return result;
123
+ }
124
+ catch (error) {
125
+ lastError = error;
126
+ // Check if error is retryable
127
+ const isRetryable = error instanceof NeuroLinkFeatureError &&
128
+ isRetryableEvaluationError(error);
129
+ if (isRetryable && retryCount < (this.config.maxRetries || 2)) {
130
+ retryCount++;
131
+ logger.debug(`[BatchEvaluator.evaluateBatch] Retrying evaluation for item ${item.id}`, { attempt: retryCount + 1, itemId: item.id });
132
+ await this.delay(this.config.retryDelay || 1000);
133
+ continue;
134
+ }
135
+ // Not retryable or max retries exceeded
136
+ break;
137
+ }
138
+ }
139
+ // Failed after all retries
140
+ const errorResult = {
141
+ id: item.id,
142
+ success: false,
143
+ error: {
144
+ message: lastError?.message || "Unknown error",
145
+ code: lastError instanceof NeuroLinkFeatureError
146
+ ? lastError.code
147
+ : undefined,
148
+ retryable: lastError instanceof NeuroLinkFeatureError
149
+ ? lastError.retryable
150
+ : false,
151
+ },
152
+ duration: Date.now() - itemStartTime,
153
+ retryCount,
154
+ };
155
+ failed++;
156
+ completed++;
157
+ reportProgress();
158
+ if (this.config.onItemComplete) {
159
+ try {
160
+ this.config.onItemComplete(errorResult);
161
+ }
162
+ catch (callbackError) {
163
+ logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
164
+ error: callbackError instanceof Error
165
+ ? callbackError.message
166
+ : String(callbackError),
167
+ });
168
+ }
169
+ }
170
+ if (!this.config.continueOnError) {
171
+ throw lastError;
172
+ }
173
+ return errorResult;
174
+ };
175
+ // Process items in batches based on concurrency
176
+ for (let i = 0; i < items.length; i += concurrency) {
177
+ const batch = items.slice(i, i + concurrency);
178
+ const settled = await Promise.allSettled(batch.map(processItem));
179
+ const batchResults = [];
180
+ for (const outcome of settled) {
181
+ if (outcome.status === "fulfilled") {
182
+ batchResults.push(outcome.value);
183
+ }
184
+ // Rejected outcomes are already handled inside processItem
185
+ // (errors are caught and returned as error results when continueOnError is true,
186
+ // or re-thrown which causes the settled entry to be 'rejected')
187
+ }
188
+ results.push(...batchResults);
189
+ // If continueOnError is false and any item in this batch was rejected, throw aggregate
190
+ if (!this.config.continueOnError) {
191
+ const rejections = settled.filter((s) => s.status === "rejected");
192
+ if (rejections.length > 0) {
193
+ const failedItems = results
194
+ .filter((r) => !r.success)
195
+ .map((r, idx) => ({
196
+ index: idx,
197
+ error: new Error(r.error?.message || "Unknown error"),
198
+ }));
199
+ throw createBatchEvaluationError(rejections.length, items.length, failedItems);
200
+ }
201
+ }
202
+ }
203
+ // Calculate summary statistics
204
+ const successfulResults = results.filter((r) => r.success && r.data);
205
+ const scores = successfulResults.map((r) => r.data.overall);
206
+ const passingScores = successfulResults.filter((r) => r.data.overall >=
207
+ (autoEvalConfig.threshold || this.config.threshold || 7));
208
+ const summary = {
209
+ total: items.length,
210
+ succeeded,
211
+ failed,
212
+ averageScore: scores.length > 0
213
+ ? scores.reduce((a, b) => a + b, 0) / scores.length
214
+ : 0,
215
+ averageDuration: results.length > 0
216
+ ? results.reduce((a, b) => a + b.duration, 0) / results.length
217
+ : 0,
218
+ totalDuration: Date.now() - startTime,
219
+ passingRate: successfulResults.length > 0
220
+ ? (passingScores.length / successfulResults.length) * 100
221
+ : 0,
222
+ };
223
+ return {
224
+ results,
225
+ summary,
226
+ allSucceeded: failed === 0,
227
+ };
228
+ }
229
+ /**
230
+ * Evaluates items sequentially (one at a time).
231
+ * Useful for debugging or when order matters.
232
+ *
233
+ * @param items - Array of items to evaluate
234
+ * @param autoEvalConfig - Auto-evaluation configuration
235
+ * @returns Batch evaluation results
236
+ */
237
+ async evaluateSequential(items, autoEvalConfig = {}) {
238
+ // Create a temporary evaluator with sequential config to avoid mutating shared state
239
+ const sequentialEvaluator = new BatchEvaluator({
240
+ ...this.config,
241
+ concurrency: 1,
242
+ });
243
+ return sequentialEvaluator.evaluateBatch(items, autoEvalConfig);
244
+ }
245
+ /**
246
+ * Gets the current configuration.
247
+ */
248
+ getConfig() {
249
+ return { ...this.config };
250
+ }
251
+ /**
252
+ * Updates the configuration.
253
+ *
254
+ * @param config - New configuration values
255
+ */
256
+ updateConfig(config) {
257
+ this.config = { ...this.config, ...config };
258
+ // Fresh evaluators are created per evaluation via _createEvaluator(),
259
+ // so no shared evaluator needs to be re-created here.
260
+ }
261
+ /**
262
+ * Helper to delay execution.
263
+ */
264
+ delay(ms) {
265
+ return new Promise((resolve) => setTimeout(resolve, ms));
266
+ }
267
+ }