@juspay/neurolink 9.36.1 → 9.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +1105 -556
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/lib/processors/media/VideoProcessor.js +90 -41
- package/dist/lib/telemetry/telemetryService.d.ts +1 -1
- package/dist/lib/telemetry/telemetryService.js +27 -13
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/processors/media/VideoProcessor.d.ts +8 -2
- package/dist/processors/media/VideoProcessor.js +90 -41
- package/dist/telemetry/telemetryService.d.ts +1 -1
- package/dist/telemetry/telemetryService.js +27 -13
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +7 -7
- package/dist/processors/media/ffprobe-static.d.ts +0 -4
package/dist/cli/parser.js
CHANGED
|
@@ -14,6 +14,7 @@ import { ragCommand } from "./commands/rag.js";
|
|
|
14
14
|
import { ObservabilityCommandFactory } from "./commands/observability.js";
|
|
15
15
|
import { TelemetryCommandFactory } from "./commands/telemetry.js";
|
|
16
16
|
import { proxyStartCommand, proxyStatusCommand, proxySetupCommand, proxyGuardCommand, proxyInstallCommand, proxyUninstallCommand, } from "./commands/proxy.js";
|
|
17
|
+
import { EvaluateCommandFactory } from "./commands/evaluate.js";
|
|
17
18
|
// Enhanced CLI with Professional UX
|
|
18
19
|
export function initializeCliParser() {
|
|
19
20
|
return (yargs(hideBin(process.argv))
|
|
@@ -196,6 +197,8 @@ export function initializeCliParser() {
|
|
|
196
197
|
.command(proxyUninstallCommand)
|
|
197
198
|
.demandCommand(1, "Please specify a proxy subcommand: start, status, setup, guard, install, or uninstall"),
|
|
198
199
|
handler: () => { },
|
|
199
|
-
})
|
|
200
|
+
})
|
|
201
|
+
// Evaluate Command Group - Using EvaluateCommandFactory
|
|
202
|
+
.command(EvaluateCommandFactory.createEvaluateCommand())); // Close the main return statement
|
|
200
203
|
}
|
|
201
204
|
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file BatchEvaluator - Supports batch evaluation of multiple responses.
|
|
3
|
+
* Enables parallel evaluation with configurable concurrency and error handling.
|
|
4
|
+
*/
|
|
5
|
+
import type { LanguageModelV3CallOptions } from "@ai-sdk/provider";
|
|
6
|
+
import type { GenerateResult } from "../types/generateTypes.js";
|
|
7
|
+
import type { EvaluationConfig } from "../types/evaluationTypes.js";
|
|
8
|
+
import type { EvaluationData } from "../types/evaluation.js";
|
|
9
|
+
import type { AutoEvaluationConfig } from "../types/middlewareTypes.js";
|
|
10
|
+
/**
|
|
11
|
+
* Configuration for batch evaluation.
|
|
12
|
+
*/
|
|
13
|
+
export interface BatchEvaluationConfig extends EvaluationConfig {
|
|
14
|
+
/** Maximum number of concurrent evaluations (default: 5) */
|
|
15
|
+
concurrency?: number;
|
|
16
|
+
/** Whether to continue on individual failures (default: true) */
|
|
17
|
+
continueOnError?: boolean;
|
|
18
|
+
/** Maximum retries for retryable errors (default: 2) */
|
|
19
|
+
maxRetries?: number;
|
|
20
|
+
/** Delay between retries in milliseconds (default: 1000) */
|
|
21
|
+
retryDelay?: number;
|
|
22
|
+
/** Callback for progress updates */
|
|
23
|
+
onProgress?: (progress: BatchProgress) => void;
|
|
24
|
+
/** Callback for individual evaluation completion */
|
|
25
|
+
onItemComplete?: (result: BatchEvaluationItemResult) => void;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Progress information for batch evaluation.
|
|
29
|
+
*/
|
|
30
|
+
export interface BatchProgress {
|
|
31
|
+
/** Total items to evaluate */
|
|
32
|
+
total: number;
|
|
33
|
+
/** Items completed (success + failed) */
|
|
34
|
+
completed: number;
|
|
35
|
+
/** Items that succeeded */
|
|
36
|
+
succeeded: number;
|
|
37
|
+
/** Items that failed */
|
|
38
|
+
failed: number;
|
|
39
|
+
/** Items still pending */
|
|
40
|
+
pending: number;
|
|
41
|
+
/** Percentage complete */
|
|
42
|
+
percentComplete: number;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Input item for batch evaluation.
|
|
46
|
+
*/
|
|
47
|
+
export interface BatchEvaluationItem {
|
|
48
|
+
/** Unique identifier for this item */
|
|
49
|
+
id: string;
|
|
50
|
+
/** The generation options */
|
|
51
|
+
options: LanguageModelV3CallOptions;
|
|
52
|
+
/** The generation result to evaluate */
|
|
53
|
+
result: GenerateResult;
|
|
54
|
+
/** Optional item-specific threshold override */
|
|
55
|
+
threshold?: number;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Result for a single item in batch evaluation.
|
|
59
|
+
*/
|
|
60
|
+
export interface BatchEvaluationItemResult {
|
|
61
|
+
/** The item ID */
|
|
62
|
+
id: string;
|
|
63
|
+
/** Whether the evaluation succeeded */
|
|
64
|
+
success: boolean;
|
|
65
|
+
/** The evaluation data (if successful) */
|
|
66
|
+
data?: EvaluationData;
|
|
67
|
+
/** Error information (if failed) */
|
|
68
|
+
error?: {
|
|
69
|
+
message: string;
|
|
70
|
+
code?: string;
|
|
71
|
+
retryable?: boolean;
|
|
72
|
+
};
|
|
73
|
+
/** Time taken for this evaluation in milliseconds */
|
|
74
|
+
duration: number;
|
|
75
|
+
/** Number of retry attempts (if any) */
|
|
76
|
+
retryCount: number;
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Result of a batch evaluation operation.
|
|
80
|
+
*/
|
|
81
|
+
export interface BatchEvaluationResult {
|
|
82
|
+
/** All item results */
|
|
83
|
+
results: BatchEvaluationItemResult[];
|
|
84
|
+
/** Summary statistics */
|
|
85
|
+
summary: {
|
|
86
|
+
/** Total items evaluated */
|
|
87
|
+
total: number;
|
|
88
|
+
/** Number of successful evaluations */
|
|
89
|
+
succeeded: number;
|
|
90
|
+
/** Number of failed evaluations */
|
|
91
|
+
failed: number;
|
|
92
|
+
/** Average evaluation score (for successful items) */
|
|
93
|
+
averageScore: number;
|
|
94
|
+
/** Average evaluation time in milliseconds */
|
|
95
|
+
averageDuration: number;
|
|
96
|
+
/** Total time for batch evaluation */
|
|
97
|
+
totalDuration: number;
|
|
98
|
+
/** Passing rate (percentage of items meeting threshold) */
|
|
99
|
+
passingRate: number;
|
|
100
|
+
};
|
|
101
|
+
/** Whether all evaluations succeeded */
|
|
102
|
+
allSucceeded: boolean;
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* BatchEvaluator - Performs evaluation on multiple items in parallel.
|
|
106
|
+
* Supports configurable concurrency, retry logic, and progress tracking.
|
|
107
|
+
*
|
|
108
|
+
* @example
|
|
109
|
+
* ```typescript
|
|
110
|
+
* const batchEvaluator = new BatchEvaluator({
|
|
111
|
+
* concurrency: 3,
|
|
112
|
+
* continueOnError: true,
|
|
113
|
+
* onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
|
|
114
|
+
* });
|
|
115
|
+
*
|
|
116
|
+
* const items = [
|
|
117
|
+
* { id: '1', options: opts1, result: result1 },
|
|
118
|
+
* { id: '2', options: opts2, result: result2 },
|
|
119
|
+
* ];
|
|
120
|
+
*
|
|
121
|
+
* const batchResult = await batchEvaluator.evaluateBatch(items);
|
|
122
|
+
* console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
|
|
123
|
+
* ```
|
|
124
|
+
*/
|
|
125
|
+
export declare class BatchEvaluator {
|
|
126
|
+
private config;
|
|
127
|
+
constructor(config?: BatchEvaluationConfig);
|
|
128
|
+
/**
|
|
129
|
+
* Create a fresh Evaluator instance for each evaluation to avoid leaking state.
|
|
130
|
+
*/
|
|
131
|
+
private _createEvaluator;
|
|
132
|
+
/**
|
|
133
|
+
* Evaluates a batch of items in parallel with controlled concurrency.
|
|
134
|
+
*
|
|
135
|
+
* @param items - Array of items to evaluate
|
|
136
|
+
* @param autoEvalConfig - Auto-evaluation configuration for thresholds
|
|
137
|
+
* @returns Batch evaluation results with summary statistics
|
|
138
|
+
*/
|
|
139
|
+
evaluateBatch(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
|
|
140
|
+
/**
|
|
141
|
+
* Evaluates items sequentially (one at a time).
|
|
142
|
+
* Useful for debugging or when order matters.
|
|
143
|
+
*
|
|
144
|
+
* @param items - Array of items to evaluate
|
|
145
|
+
* @param autoEvalConfig - Auto-evaluation configuration
|
|
146
|
+
* @returns Batch evaluation results
|
|
147
|
+
*/
|
|
148
|
+
evaluateSequential(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
|
|
149
|
+
/**
|
|
150
|
+
* Gets the current configuration.
|
|
151
|
+
*/
|
|
152
|
+
getConfig(): BatchEvaluationConfig;
|
|
153
|
+
/**
|
|
154
|
+
* Updates the configuration.
|
|
155
|
+
*
|
|
156
|
+
* @param config - New configuration values
|
|
157
|
+
*/
|
|
158
|
+
updateConfig(config: Partial<BatchEvaluationConfig>): void;
|
|
159
|
+
/**
|
|
160
|
+
* Helper to delay execution.
|
|
161
|
+
*/
|
|
162
|
+
private delay;
|
|
163
|
+
}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file BatchEvaluator - Supports batch evaluation of multiple responses.
|
|
3
|
+
* Enables parallel evaluation with configurable concurrency and error handling.
|
|
4
|
+
*/
|
|
5
|
+
import { Evaluator } from "./index.js";
|
|
6
|
+
import { createBatchEvaluationError, isRetryableEvaluationError, } from "./errors/EvaluationError.js";
|
|
7
|
+
import { logger } from "../utils/logger.js";
|
|
8
|
+
import { NeuroLinkFeatureError } from "../core/infrastructure/index.js";
|
|
9
|
+
/**
|
|
10
|
+
* BatchEvaluator - Performs evaluation on multiple items in parallel.
|
|
11
|
+
* Supports configurable concurrency, retry logic, and progress tracking.
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```typescript
|
|
15
|
+
* const batchEvaluator = new BatchEvaluator({
|
|
16
|
+
* concurrency: 3,
|
|
17
|
+
* continueOnError: true,
|
|
18
|
+
* onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* const items = [
|
|
22
|
+
* { id: '1', options: opts1, result: result1 },
|
|
23
|
+
* { id: '2', options: opts2, result: result2 },
|
|
24
|
+
* ];
|
|
25
|
+
*
|
|
26
|
+
* const batchResult = await batchEvaluator.evaluateBatch(items);
|
|
27
|
+
* console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export class BatchEvaluator {
|
|
31
|
+
config;
|
|
32
|
+
constructor(config = {}) {
|
|
33
|
+
this.config = {
|
|
34
|
+
concurrency: 5,
|
|
35
|
+
continueOnError: true,
|
|
36
|
+
maxRetries: 2,
|
|
37
|
+
retryDelay: 1000,
|
|
38
|
+
...config,
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Create a fresh Evaluator instance for each evaluation to avoid leaking state.
|
|
43
|
+
*/
|
|
44
|
+
_createEvaluator() {
|
|
45
|
+
return new Evaluator(this.config);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Evaluates a batch of items in parallel with controlled concurrency.
|
|
49
|
+
*
|
|
50
|
+
* @param items - Array of items to evaluate
|
|
51
|
+
* @param autoEvalConfig - Auto-evaluation configuration for thresholds
|
|
52
|
+
* @returns Batch evaluation results with summary statistics
|
|
53
|
+
*/
|
|
54
|
+
async evaluateBatch(items, autoEvalConfig = {}) {
|
|
55
|
+
const startTime = Date.now();
|
|
56
|
+
const results = [];
|
|
57
|
+
const concurrency = this.config.concurrency || 5;
|
|
58
|
+
// Track progress
|
|
59
|
+
let completed = 0;
|
|
60
|
+
let succeeded = 0;
|
|
61
|
+
let failed = 0;
|
|
62
|
+
const reportProgress = () => {
|
|
63
|
+
if (this.config.onProgress) {
|
|
64
|
+
try {
|
|
65
|
+
this.config.onProgress({
|
|
66
|
+
total: items.length,
|
|
67
|
+
completed,
|
|
68
|
+
succeeded,
|
|
69
|
+
failed,
|
|
70
|
+
pending: items.length - completed,
|
|
71
|
+
percentComplete: Math.round((completed / items.length) * 100),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
catch (callbackError) {
|
|
75
|
+
logger.warn("[BatchEvaluator] onProgress callback threw an error", {
|
|
76
|
+
error: callbackError instanceof Error
|
|
77
|
+
? callbackError.message
|
|
78
|
+
: String(callbackError),
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
// Process items with concurrency limit
|
|
84
|
+
const processItem = async (item) => {
|
|
85
|
+
const itemStartTime = Date.now();
|
|
86
|
+
let retryCount = 0;
|
|
87
|
+
let lastError;
|
|
88
|
+
while (retryCount <= (this.config.maxRetries || 2)) {
|
|
89
|
+
try {
|
|
90
|
+
const threshold = item.threshold ||
|
|
91
|
+
autoEvalConfig.threshold ||
|
|
92
|
+
this.config.threshold ||
|
|
93
|
+
7;
|
|
94
|
+
// Create fresh evaluator per attempt to avoid leaking state
|
|
95
|
+
const evaluator = this._createEvaluator();
|
|
96
|
+
const data = await evaluator.evaluate(item.options, item.result, threshold, {
|
|
97
|
+
...autoEvalConfig,
|
|
98
|
+
threshold,
|
|
99
|
+
});
|
|
100
|
+
const result = {
|
|
101
|
+
id: item.id,
|
|
102
|
+
success: true,
|
|
103
|
+
data,
|
|
104
|
+
duration: Date.now() - itemStartTime,
|
|
105
|
+
retryCount,
|
|
106
|
+
};
|
|
107
|
+
succeeded++;
|
|
108
|
+
completed++;
|
|
109
|
+
reportProgress();
|
|
110
|
+
if (this.config.onItemComplete) {
|
|
111
|
+
try {
|
|
112
|
+
this.config.onItemComplete(result);
|
|
113
|
+
}
|
|
114
|
+
catch (callbackError) {
|
|
115
|
+
logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
|
|
116
|
+
error: callbackError instanceof Error
|
|
117
|
+
? callbackError.message
|
|
118
|
+
: String(callbackError),
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
124
|
+
catch (error) {
|
|
125
|
+
lastError = error;
|
|
126
|
+
// Check if error is retryable
|
|
127
|
+
const isRetryable = error instanceof NeuroLinkFeatureError &&
|
|
128
|
+
isRetryableEvaluationError(error);
|
|
129
|
+
if (isRetryable && retryCount < (this.config.maxRetries || 2)) {
|
|
130
|
+
retryCount++;
|
|
131
|
+
logger.debug(`[BatchEvaluator.evaluateBatch] Retrying evaluation for item ${item.id}`, { attempt: retryCount + 1, itemId: item.id });
|
|
132
|
+
await this.delay(this.config.retryDelay || 1000);
|
|
133
|
+
continue;
|
|
134
|
+
}
|
|
135
|
+
// Not retryable or max retries exceeded
|
|
136
|
+
break;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// Failed after all retries
|
|
140
|
+
const errorResult = {
|
|
141
|
+
id: item.id,
|
|
142
|
+
success: false,
|
|
143
|
+
error: {
|
|
144
|
+
message: lastError?.message || "Unknown error",
|
|
145
|
+
code: lastError instanceof NeuroLinkFeatureError
|
|
146
|
+
? lastError.code
|
|
147
|
+
: undefined,
|
|
148
|
+
retryable: lastError instanceof NeuroLinkFeatureError
|
|
149
|
+
? lastError.retryable
|
|
150
|
+
: false,
|
|
151
|
+
},
|
|
152
|
+
duration: Date.now() - itemStartTime,
|
|
153
|
+
retryCount,
|
|
154
|
+
};
|
|
155
|
+
failed++;
|
|
156
|
+
completed++;
|
|
157
|
+
reportProgress();
|
|
158
|
+
if (this.config.onItemComplete) {
|
|
159
|
+
try {
|
|
160
|
+
this.config.onItemComplete(errorResult);
|
|
161
|
+
}
|
|
162
|
+
catch (callbackError) {
|
|
163
|
+
logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
|
|
164
|
+
error: callbackError instanceof Error
|
|
165
|
+
? callbackError.message
|
|
166
|
+
: String(callbackError),
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
if (!this.config.continueOnError) {
|
|
171
|
+
throw lastError;
|
|
172
|
+
}
|
|
173
|
+
return errorResult;
|
|
174
|
+
};
|
|
175
|
+
// Process items in batches based on concurrency
|
|
176
|
+
for (let i = 0; i < items.length; i += concurrency) {
|
|
177
|
+
const batch = items.slice(i, i + concurrency);
|
|
178
|
+
const settled = await Promise.allSettled(batch.map(processItem));
|
|
179
|
+
const batchResults = [];
|
|
180
|
+
for (const outcome of settled) {
|
|
181
|
+
if (outcome.status === "fulfilled") {
|
|
182
|
+
batchResults.push(outcome.value);
|
|
183
|
+
}
|
|
184
|
+
// Rejected outcomes are already handled inside processItem
|
|
185
|
+
// (errors are caught and returned as error results when continueOnError is true,
|
|
186
|
+
// or re-thrown which causes the settled entry to be 'rejected')
|
|
187
|
+
}
|
|
188
|
+
results.push(...batchResults);
|
|
189
|
+
// If continueOnError is false and any item in this batch was rejected, throw aggregate
|
|
190
|
+
if (!this.config.continueOnError) {
|
|
191
|
+
const rejections = settled.filter((s) => s.status === "rejected");
|
|
192
|
+
if (rejections.length > 0) {
|
|
193
|
+
const failedItems = results
|
|
194
|
+
.filter((r) => !r.success)
|
|
195
|
+
.map((r, idx) => ({
|
|
196
|
+
index: idx,
|
|
197
|
+
error: new Error(r.error?.message || "Unknown error"),
|
|
198
|
+
}));
|
|
199
|
+
throw createBatchEvaluationError(rejections.length, items.length, failedItems);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Calculate summary statistics
|
|
204
|
+
const successfulResults = results.filter((r) => r.success && r.data);
|
|
205
|
+
const scores = successfulResults.map((r) => r.data.overall);
|
|
206
|
+
const passingScores = successfulResults.filter((r) => r.data.overall >=
|
|
207
|
+
(autoEvalConfig.threshold || this.config.threshold || 7));
|
|
208
|
+
const summary = {
|
|
209
|
+
total: items.length,
|
|
210
|
+
succeeded,
|
|
211
|
+
failed,
|
|
212
|
+
averageScore: scores.length > 0
|
|
213
|
+
? scores.reduce((a, b) => a + b, 0) / scores.length
|
|
214
|
+
: 0,
|
|
215
|
+
averageDuration: results.length > 0
|
|
216
|
+
? results.reduce((a, b) => a + b.duration, 0) / results.length
|
|
217
|
+
: 0,
|
|
218
|
+
totalDuration: Date.now() - startTime,
|
|
219
|
+
passingRate: successfulResults.length > 0
|
|
220
|
+
? (passingScores.length / successfulResults.length) * 100
|
|
221
|
+
: 0,
|
|
222
|
+
};
|
|
223
|
+
return {
|
|
224
|
+
results,
|
|
225
|
+
summary,
|
|
226
|
+
allSucceeded: failed === 0,
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Evaluates items sequentially (one at a time).
|
|
231
|
+
* Useful for debugging or when order matters.
|
|
232
|
+
*
|
|
233
|
+
* @param items - Array of items to evaluate
|
|
234
|
+
* @param autoEvalConfig - Auto-evaluation configuration
|
|
235
|
+
* @returns Batch evaluation results
|
|
236
|
+
*/
|
|
237
|
+
async evaluateSequential(items, autoEvalConfig = {}) {
|
|
238
|
+
// Create a temporary evaluator with sequential config to avoid mutating shared state
|
|
239
|
+
const sequentialEvaluator = new BatchEvaluator({
|
|
240
|
+
...this.config,
|
|
241
|
+
concurrency: 1,
|
|
242
|
+
});
|
|
243
|
+
return sequentialEvaluator.evaluateBatch(items, autoEvalConfig);
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Gets the current configuration.
|
|
247
|
+
*/
|
|
248
|
+
getConfig() {
|
|
249
|
+
return { ...this.config };
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Updates the configuration.
|
|
253
|
+
*
|
|
254
|
+
* @param config - New configuration values
|
|
255
|
+
*/
|
|
256
|
+
updateConfig(config) {
|
|
257
|
+
this.config = { ...this.config, ...config };
|
|
258
|
+
// Fresh evaluators are created per evaluation via _createEvaluator(),
|
|
259
|
+
// so no shared evaluator needs to be re-created here.
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Helper to delay execution.
|
|
263
|
+
*/
|
|
264
|
+
delay(ms) {
|
|
265
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
266
|
+
}
|
|
267
|
+
}
|