npm - @juspay/neurolink - Versions diffs - 9.36.1 → 9.37.0 - Mend

@juspay/neurolink 9.36.1 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (202) hide show

package/CHANGELOG.md +6 -0
package/dist/auth/errors.d.ts +1 -1
package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
package/dist/browser/neurolink.min.js +921 -423
package/dist/cli/commands/evaluate.d.ts +48 -0
package/dist/cli/commands/evaluate.js +955 -0
package/dist/cli/parser.js +4 -1
package/dist/evaluation/BatchEvaluator.d.ts +163 -0
package/dist/evaluation/BatchEvaluator.js +267 -0
package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
package/dist/evaluation/EvaluationAggregator.js +377 -0
package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
package/dist/evaluation/EvaluatorFactory.js +280 -0
package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
package/dist/evaluation/EvaluatorRegistry.js +184 -0
package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
package/dist/evaluation/errors/EvaluationError.js +206 -0
package/dist/evaluation/errors/index.d.ts +4 -0
package/dist/evaluation/errors/index.js +4 -0
package/dist/evaluation/hooks/index.d.ts +6 -0
package/dist/evaluation/hooks/index.js +6 -0
package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
package/dist/evaluation/hooks/observabilityHooks.js +181 -0
package/dist/evaluation/index.d.ts +11 -2
package/dist/evaluation/index.js +15 -0
package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
package/dist/evaluation/pipeline/index.d.ts +8 -0
package/dist/evaluation/pipeline/index.js +8 -0
package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
package/dist/evaluation/pipeline/presets.d.ts +66 -0
package/dist/evaluation/pipeline/presets.js +224 -0
package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
package/dist/evaluation/pipeline/strategies/index.js +6 -0
package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
package/dist/evaluation/reporting/index.d.ts +6 -0
package/dist/evaluation/reporting/index.js +6 -0
package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
package/dist/evaluation/reporting/metricsCollector.js +285 -0
package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
package/dist/evaluation/reporting/reportGenerator.js +374 -0
package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
package/dist/evaluation/scorers/baseScorer.js +232 -0
package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
package/dist/evaluation/scorers/customScorerUtils.js +381 -0
package/dist/evaluation/scorers/index.d.ts +10 -0
package/dist/evaluation/scorers/index.js +16 -0
package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
package/dist/evaluation/scorers/llm/index.d.ts +15 -0
package/dist/evaluation/scorers/llm/index.js +16 -0
package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
package/dist/evaluation/scorers/rule/index.d.ts +9 -0
package/dist/evaluation/scorers/rule/index.js +10 -0
package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
package/dist/evaluation/scorers/scorerBuilder.js +420 -0
package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
package/dist/evaluation/scorers/scorerRegistry.js +467 -0
package/dist/index.d.ts +37 -25
package/dist/index.js +65 -26
package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
package/dist/lib/evaluation/BatchEvaluator.js +268 -0
package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
package/dist/lib/evaluation/errors/index.d.ts +4 -0
package/dist/lib/evaluation/errors/index.js +5 -0
package/dist/lib/evaluation/hooks/index.d.ts +6 -0
package/dist/lib/evaluation/hooks/index.js +7 -0
package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
package/dist/lib/evaluation/index.d.ts +11 -2
package/dist/lib/evaluation/index.js +15 -0
package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
package/dist/lib/evaluation/pipeline/index.js +9 -0
package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
package/dist/lib/evaluation/pipeline/presets.js +225 -0
package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
package/dist/lib/evaluation/reporting/index.d.ts +6 -0
package/dist/lib/evaluation/reporting/index.js +7 -0
package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
package/dist/lib/evaluation/scorers/index.d.ts +10 -0
package/dist/lib/evaluation/scorers/index.js +17 -0
package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
package/dist/lib/evaluation/scorers/llm/index.js +17 -0
package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
package/dist/lib/evaluation/scorers/rule/index.js +11 -0
package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
package/dist/lib/index.d.ts +37 -25
package/dist/lib/index.js +65 -26
package/dist/lib/neurolink.d.ts +204 -0
package/dist/lib/neurolink.js +296 -0
package/dist/lib/types/index.d.ts +3 -1
package/dist/lib/types/index.js +3 -2
package/dist/lib/types/scorerTypes.d.ts +423 -0
package/dist/lib/types/scorerTypes.js +6 -0
package/dist/lib/utils/errorHandling.d.ts +20 -0
package/dist/lib/utils/errorHandling.js +60 -0
package/dist/neurolink.d.ts +204 -0
package/dist/neurolink.js +296 -0
package/dist/types/index.d.ts +3 -1
package/dist/types/index.js +3 -2
package/dist/types/scorerTypes.d.ts +423 -0
package/dist/types/scorerTypes.js +5 -0
package/dist/utils/errorHandling.d.ts +20 -0
package/dist/utils/errorHandling.js +60 -0
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -37,6 +37,8 @@ export { AIProviderFactory };
 export { GoogleTTSHandler } from "./adapters/tts/googleTTSHandler.js";
 // Config Manager export
 export { NeuroLinkConfigManager as ConfigManager } from "./config/configManager.js";
+// Core Infrastructure exports (Mastra-inspired patterns)
+export { BaseFactory, BaseRegistry, NeuroLinkFeatureError, createErrorFactory, withRetry, TypedEventEmitter, } from "./core/infrastructure/index.js";
 // ============================================================================
 // CLIENT SDK EXPORTS - Type-safe API access for browser and Node.js
 // Note: React hooks are NOT re-exported here. Import from '@juspay/neurolink/client'.
@@ -303,6 +305,66 @@ export async function initializeTelemetry() {
 export async function getTelemetryStatus() {
     return getStatus();
 }
+// ============================================================================
+// EVALUATION SYSTEM - Comprehensive Response Evaluation & Scoring
+// ============================================================================
+/**
+ * Evaluation System Exports
+ *
+ * A comprehensive evaluation framework for assessing AI response quality,
+ * with support for RAGAS-style metrics, custom scorers, and pipeline-based evaluation.
+ *
+ * @example
+ * ```typescript
+ * import {
+ *   Evaluator,
+ *   ScorerRegistry,
+ *   EvaluationPipeline,
+ *   createFaithfulnessScorer,
+ *   createAnswerRelevancyScorer,
+ * } from '@juspay/neurolink';
+ *
+ * // Create a pipeline with multiple scorers
+ * const pipeline = new EvaluationPipeline({
+ *   scorers: [
+ *     createFaithfulnessScorer({ model: 'gpt-4' }),
+ *     createAnswerRelevancyScorer({ model: 'gpt-4' }),
+ *   ],
+ * });
+ *
+ * // Run evaluation
+ * const result = await pipeline.evaluate({
+ *   question: 'What is quantum computing?',
+ *   answer: 'Quantum computing uses quantum mechanics...',
+ *   context: ['Quantum computing is a type of computation...'],
+ * });
+ * ```
+ */
+export {
+// Main Evaluator
+Evaluator,
+// Factory and Registry (Mastra-inspired patterns)
+BatchEvaluator, EvaluationAggregator, EvaluatorFactory, getEvaluatorFactory, EvaluatorRegistry, getEvaluatorRegistry,
+// Error utilities
+EvaluationErrorCodes, evaluationErrors, isRetryableEvaluationError, isEvaluationError, createEvaluationFailedError, createParseError, createStrategyNotFoundError, createProviderError, createMaxRetriesExceededError, createBatchEvaluationError, createConfigurationError, contextToErrorContext,
+// Hooks
+createLangfuseAdapter, createMockLangfuseClient, LangfuseAdapter, startLangfuseAdapter, createConsoleLoggerHook, createMetricsCollectorHook, ObservabilityHooks, observabilityHooks, pipelineToSpanAttributes, scorerToSpanAttributes,
+// Pipeline
+createAndInitializePipeline, createPipeline, EvaluationPipeline, PipelineBuilder, Pipelines, CODE_GENERATION_PIPELINE, COMPREHENSIVE_PIPELINE, CUSTOMER_SUPPORT_PIPELINE, getPreset, getPresetNames, MINIMAL_PIPELINE, PipelinePresets, QUALITY_PIPELINE, RAG_PIPELINE, SAFETY_PIPELINE, SUMMARIZATION_PIPELINE,
+// Strategies
+BatchStrategy, createBatchStrategy, evaluateBatch, streamBatchEvaluation, createSamplingStrategy, DEFAULT_SAMPLING_CONFIG, SamplingStrategies, SamplingStrategy,
+// Reporting
+createMetricsCollector, globalMetricsCollector, MetricsCollector, createReportGenerator, ReportGenerator, Reports,
+// Scorers - Base
+BaseScorer, DEFAULT_SCORE_SCALE as EVAL_DEFAULT_SCORE_SCALE,
+// Scorers - Custom utilities
+composeScorers, createConditionalScorer, createFunctionScorer, createInvertedScorer, createKeywordScorer, createRegexScorer, createScorerMetadata, createSimpleLengthScorer,
+// Scorers - LLM-based
+AnswerRelevancyScorer, createAnswerRelevancyScorer, BaseLLMScorer, DEFAULT_LLM_SCORER_CONFIG, BiasDetectionScorer, createBiasDetectionScorer, ContextPrecisionScorer, createContextPrecisionScorer, ContextRelevancyScorer, createContextRelevancyScorer, createFaithfulnessScorer, FaithfulnessScorer, createHallucinationScorer, HallucinationScorer, createPromptAlignmentScorer, PromptAlignmentScorer, createSummarizationScorer, SummarizationScorer, createToneConsistencyScorer, ToneConsistencyScorer, createToxicityScorer, ToxicityScorer,
+// Scorers - Rule-based
+BaseRuleScorer, DEFAULT_RULE_SCORER_CONFIG, ContentSimilarityScorer, createContentSimilarityScorer, createFormatScorer, FormatScorer, FormatScorerPresets, createKeywordCoverageScorer, KeywordCoverageScorer, createLengthScorer, LengthScorer, LengthScorerPresets,
+// Scorers - Builder & Registry
+ScorerBuilder, Scorers, ScorerRegistry, } from "./evaluation/index.js";
 /**
  * Legacy generateText function for backward compatibility.
  *
@@ -482,33 +544,10 @@ extractMetadata, formatContextWithCitations,
 GraphRAG, getAvailableStrategies, getCircuitBreaker, getDefaultChunkerConfig, getRecommendedStrategy, HTMLChunker, HTMLLoader, InMemoryBM25Index, InMemoryVectorStore, JSONChunker as RAGJSONChunker, JSONLoader, LaTeXChunker, LLMMetadataExtractor, linearCombination, loadDocument, loadDocuments, MarkdownChunker, MarkdownLoader, MDocument, PDFLoader,
 // RAG Integration for generate/stream
 prepareRAGTool, processDocument, RAGCircuitBreaker, RAGCircuitBreakerManager, RAGPipeline, RAGRetryHandler, RecursiveChunker, ragCircuitBreakerManager, reciprocalRankFusion, rerank, SemanticChunker, SentenceChunker, simpleRerank, summarizeContext, TextLoader, TokenChunker, WebLoader, } from "./rag/index.js";
-// ============================================================================
-// EVALUATION / SCORING - RAGAS-style Response Quality Evaluation
-// ============================================================================
+// Legacy RAGAS evaluation classes are now exported from the unified
+// evaluation block above (via ./evaluation/index.js barrel).
+// ContextBuilder is the only class not covered by the barrel export.
 export { ContextBuilder } from "./evaluation/contextBuilder.js";
-/**
- * Evaluation system for AI response quality assessment.
- *
- * Uses RAGAS-style model-based evaluation with a "judge" LLM to score
- * responses on relevance, accuracy, completeness, and overall quality.
- * Supports retry logic with progressive prompt improvement.
- *
- * @example
- * ```typescript
- * import { Evaluator, RAGASEvaluator, ContextBuilder, RetryManager } from '@juspay/neurolink';
- *
- * const evaluator = new Evaluator({
- *   evaluationModel: 'gemini-1.5-flash',
- *   provider: 'vertex',
- *   threshold: 7,
- * });
- * ```
- */
-export { Evaluator } from "./evaluation/index.js";
-export { PromptBuilder } from "./evaluation/prompts.js";
-export { RAGASEvaluator } from "./evaluation/ragasEvaluator.js";
-export { RetryManager } from "./evaluation/retryManager.js";
-export { mapToEvaluationData } from "./evaluation/scoring.js";
 // ============================================================================
 // AUTHENTICATION PROVIDERS - Multi-provider Auth Integration
 // ============================================================================

package/dist/lib/auth/providers/BaseAuthProvider.d.ts CHANGED Viewed

@@ -48,7 +48,7 @@ export declare const AuthProviderError: {
         retryable?: boolean;
         details?: Record<string, unknown>;
         cause?: Error;
-    } | undefined) => import("../../core/infrastructure/baseError.js").NeuroLinkFeatureError;
+    } | undefined) => import("../../index.js").NeuroLinkFeatureError;
 };
 /**
  * Default in-memory session storage

package/dist/lib/evaluation/BatchEvaluator.d.ts ADDED Viewed

@@ -0,0 +1,163 @@
+/**
+ * @file BatchEvaluator - Supports batch evaluation of multiple responses.
+ * Enables parallel evaluation with configurable concurrency and error handling.
+ */
+import type { LanguageModelV3CallOptions } from "@ai-sdk/provider";
+import type { GenerateResult } from "../types/generateTypes.js";
+import type { EvaluationConfig } from "../types/evaluationTypes.js";
+import type { EvaluationData } from "../types/evaluation.js";
+import type { AutoEvaluationConfig } from "../types/middlewareTypes.js";
+/**
+ * Configuration for batch evaluation.
+ */
+export interface BatchEvaluationConfig extends EvaluationConfig {
+    /** Maximum number of concurrent evaluations (default: 5) */
+    concurrency?: number;
+    /** Whether to continue on individual failures (default: true) */
+    continueOnError?: boolean;
+    /** Maximum retries for retryable errors (default: 2) */
+    maxRetries?: number;
+    /** Delay between retries in milliseconds (default: 1000) */
+    retryDelay?: number;
+    /** Callback for progress updates */
+    onProgress?: (progress: BatchProgress) => void;
+    /** Callback for individual evaluation completion */
+    onItemComplete?: (result: BatchEvaluationItemResult) => void;
+}
+/**
+ * Progress information for batch evaluation.
+ */
+export interface BatchProgress {
+    /** Total items to evaluate */
+    total: number;
+    /** Items completed (success + failed) */
+    completed: number;
+    /** Items that succeeded */
+    succeeded: number;
+    /** Items that failed */
+    failed: number;
+    /** Items still pending */
+    pending: number;
+    /** Percentage complete */
+    percentComplete: number;
+}
+/**
+ * Input item for batch evaluation.
+ */
+export interface BatchEvaluationItem {
+    /** Unique identifier for this item */
+    id: string;
+    /** The generation options */
+    options: LanguageModelV3CallOptions;
+    /** The generation result to evaluate */
+    result: GenerateResult;
+    /** Optional item-specific threshold override */
+    threshold?: number;
+}
+/**
+ * Result for a single item in batch evaluation.
+ */
+export interface BatchEvaluationItemResult {
+    /** The item ID */
+    id: string;
+    /** Whether the evaluation succeeded */
+    success: boolean;
+    /** The evaluation data (if successful) */
+    data?: EvaluationData;
+    /** Error information (if failed) */
+    error?: {
+        message: string;
+        code?: string;
+        retryable?: boolean;
+    };
+    /** Time taken for this evaluation in milliseconds */
+    duration: number;
+    /** Number of retry attempts (if any) */
+    retryCount: number;
+}
+/**
+ * Result of a batch evaluation operation.
+ */
+export interface BatchEvaluationResult {
+    /** All item results */
+    results: BatchEvaluationItemResult[];
+    /** Summary statistics */
+    summary: {
+        /** Total items evaluated */
+        total: number;
+        /** Number of successful evaluations */
+        succeeded: number;
+        /** Number of failed evaluations */
+        failed: number;
+        /** Average evaluation score (for successful items) */
+        averageScore: number;
+        /** Average evaluation time in milliseconds */
+        averageDuration: number;
+        /** Total time for batch evaluation */
+        totalDuration: number;
+        /** Passing rate (percentage of items meeting threshold) */
+        passingRate: number;
+    };
+    /** Whether all evaluations succeeded */
+    allSucceeded: boolean;
+}
+/**
+ * BatchEvaluator - Performs evaluation on multiple items in parallel.
+ * Supports configurable concurrency, retry logic, and progress tracking.
+ *
+ * @example
+ * ```typescript
+ * const batchEvaluator = new BatchEvaluator({
+ *   concurrency: 3,
+ *   continueOnError: true,
+ *   onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
+ * });
+ *
+ * const items = [
+ *   { id: '1', options: opts1, result: result1 },
+ *   { id: '2', options: opts2, result: result2 },
+ * ];
+ *
+ * const batchResult = await batchEvaluator.evaluateBatch(items);
+ * console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
+ * ```
+ */
+export declare class BatchEvaluator {
+    private config;
+    constructor(config?: BatchEvaluationConfig);
+    /**
+     * Create a fresh Evaluator instance for each evaluation to avoid leaking state.
+     */
+    private _createEvaluator;
+    /**
+     * Evaluates a batch of items in parallel with controlled concurrency.
+     *
+     * @param items - Array of items to evaluate
+     * @param autoEvalConfig - Auto-evaluation configuration for thresholds
+     * @returns Batch evaluation results with summary statistics
+     */
+    evaluateBatch(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
+    /**
+     * Evaluates items sequentially (one at a time).
+     * Useful for debugging or when order matters.
+     *
+     * @param items - Array of items to evaluate
+     * @param autoEvalConfig - Auto-evaluation configuration
+     * @returns Batch evaluation results
+     */
+    evaluateSequential(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
+    /**
+     * Gets the current configuration.
+     */
+    getConfig(): BatchEvaluationConfig;
+    /**
+     * Updates the configuration.
+     *
+     * @param config - New configuration values
+     */
+    updateConfig(config: Partial<BatchEvaluationConfig>): void;
+    /**
+     * Helper to delay execution.
+     */
+    private delay;
+}

package/dist/lib/evaluation/BatchEvaluator.js ADDED Viewed

@@ -0,0 +1,268 @@
+/**
+ * @file BatchEvaluator - Supports batch evaluation of multiple responses.
+ * Enables parallel evaluation with configurable concurrency and error handling.
+ */
+import { Evaluator } from "./index.js";
+import { createBatchEvaluationError, isRetryableEvaluationError, } from "./errors/EvaluationError.js";
+import { logger } from "../utils/logger.js";
+import { NeuroLinkFeatureError } from "../core/infrastructure/index.js";
+/**
+ * BatchEvaluator - Performs evaluation on multiple items in parallel.
+ * Supports configurable concurrency, retry logic, and progress tracking.
+ *
+ * @example
+ * ```typescript
+ * const batchEvaluator = new BatchEvaluator({
+ *   concurrency: 3,
+ *   continueOnError: true,
+ *   onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
+ * });
+ *
+ * const items = [
+ *   { id: '1', options: opts1, result: result1 },
+ *   { id: '2', options: opts2, result: result2 },
+ * ];
+ *
+ * const batchResult = await batchEvaluator.evaluateBatch(items);
+ * console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
+ * ```
+ */
+export class BatchEvaluator {
+    config;
+    constructor(config = {}) {
+        this.config = {
+            concurrency: 5,
+            continueOnError: true,
+            maxRetries: 2,
+            retryDelay: 1000,
+            ...config,
+        };
+    }
+    /**
+     * Create a fresh Evaluator instance for each evaluation to avoid leaking state.
+     */
+    _createEvaluator() {
+        return new Evaluator(this.config);
+    }
+    /**
+     * Evaluates a batch of items in parallel with controlled concurrency.
+     *
+     * @param items - Array of items to evaluate
+     * @param autoEvalConfig - Auto-evaluation configuration for thresholds
+     * @returns Batch evaluation results with summary statistics
+     */
+    async evaluateBatch(items, autoEvalConfig = {}) {
+        const startTime = Date.now();
+        const results = [];
+        const concurrency = this.config.concurrency || 5;
+        // Track progress
+        let completed = 0;
+        let succeeded = 0;
+        let failed = 0;
+        const reportProgress = () => {
+            if (this.config.onProgress) {
+                try {
+                    this.config.onProgress({
+                        total: items.length,
+                        completed,
+                        succeeded,
+                        failed,
+                        pending: items.length - completed,
+                        percentComplete: Math.round((completed / items.length) * 100),
+                    });
+                }
+                catch (callbackError) {
+                    logger.warn("[BatchEvaluator] onProgress callback threw an error", {
+                        error: callbackError instanceof Error
+                            ? callbackError.message
+                            : String(callbackError),
+                    });
+                }
+            }
+        };
+        // Process items with concurrency limit
+        const processItem = async (item) => {
+            const itemStartTime = Date.now();
+            let retryCount = 0;
+            let lastError;
+            while (retryCount <= (this.config.maxRetries || 2)) {
+                try {
+                    const threshold = item.threshold ||
+                        autoEvalConfig.threshold ||
+                        this.config.threshold ||
+                        7;
+                    // Create fresh evaluator per attempt to avoid leaking state
+                    const evaluator = this._createEvaluator();
+                    const data = await evaluator.evaluate(item.options, item.result, threshold, {
+                        ...autoEvalConfig,
+                        threshold,
+                    });
+                    const result = {
+                        id: item.id,
+                        success: true,
+                        data,
+                        duration: Date.now() - itemStartTime,
+                        retryCount,
+                    };
+                    succeeded++;
+                    completed++;
+                    reportProgress();
+                    if (this.config.onItemComplete) {
+                        try {
+                            this.config.onItemComplete(result);
+                        }
+                        catch (callbackError) {
+                            logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
+                                error: callbackError instanceof Error
+                                    ? callbackError.message
+                                    : String(callbackError),
+                            });
+                        }
+                    }
+                    return result;
+                }
+                catch (error) {
+                    lastError = error;
+                    // Check if error is retryable
+                    const isRetryable = error instanceof NeuroLinkFeatureError &&
+                        isRetryableEvaluationError(error);
+                    if (isRetryable && retryCount < (this.config.maxRetries || 2)) {
+                        retryCount++;
+                        logger.debug(`[BatchEvaluator.evaluateBatch] Retrying evaluation for item ${item.id}`, { attempt: retryCount + 1, itemId: item.id });
+                        await this.delay(this.config.retryDelay || 1000);
+                        continue;
+                    }
+                    // Not retryable or max retries exceeded
+                    break;
+                }
+            }
+            // Failed after all retries
+            const errorResult = {
+                id: item.id,
+                success: false,
+                error: {
+                    message: lastError?.message || "Unknown error",
+                    code: lastError instanceof NeuroLinkFeatureError
+                        ? lastError.code
+                        : undefined,
+                    retryable: lastError instanceof NeuroLinkFeatureError
+                        ? lastError.retryable
+                        : false,
+                },
+                duration: Date.now() - itemStartTime,
+                retryCount,
+            };
+            failed++;
+            completed++;
+            reportProgress();
+            if (this.config.onItemComplete) {
+                try {
+                    this.config.onItemComplete(errorResult);
+                }
+                catch (callbackError) {
+                    logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
+                        error: callbackError instanceof Error
+                            ? callbackError.message
+                            : String(callbackError),
+                    });
+                }
+            }
+            if (!this.config.continueOnError) {
+                throw lastError;
+            }
+            return errorResult;
+        };
+        // Process items in batches based on concurrency
+        for (let i = 0; i < items.length; i += concurrency) {
+            const batch = items.slice(i, i + concurrency);
+            const settled = await Promise.allSettled(batch.map(processItem));
+            const batchResults = [];
+            for (const outcome of settled) {
+                if (outcome.status === "fulfilled") {
+                    batchResults.push(outcome.value);
+                }
+                // Rejected outcomes are already handled inside processItem
+                // (errors are caught and returned as error results when continueOnError is true,
+                //  or re-thrown which causes the settled entry to be 'rejected')
+            }
+            results.push(...batchResults);
+            // If continueOnError is false and any item in this batch was rejected, throw aggregate
+            if (!this.config.continueOnError) {
+                const rejections = settled.filter((s) => s.status === "rejected");
+                if (rejections.length > 0) {
+                    const failedItems = results
+                        .filter((r) => !r.success)
+                        .map((r, idx) => ({
+                        index: idx,
+                        error: new Error(r.error?.message || "Unknown error"),
+                    }));
+                    throw createBatchEvaluationError(rejections.length, items.length, failedItems);
+                }
+            }
+        }
+        // Calculate summary statistics
+        const successfulResults = results.filter((r) => r.success && r.data);
+        const scores = successfulResults.map((r) => r.data.overall);
+        const passingScores = successfulResults.filter((r) => r.data.overall >=
+            (autoEvalConfig.threshold || this.config.threshold || 7));
+        const summary = {
+            total: items.length,
+            succeeded,
+            failed,
+            averageScore: scores.length > 0
+                ? scores.reduce((a, b) => a + b, 0) / scores.length
+                : 0,
+            averageDuration: results.length > 0
+                ? results.reduce((a, b) => a + b.duration, 0) / results.length
+                : 0,
+            totalDuration: Date.now() - startTime,
+            passingRate: successfulResults.length > 0
+                ? (passingScores.length / successfulResults.length) * 100
+                : 0,
+        };
+        return {
+            results,
+            summary,
+            allSucceeded: failed === 0,
+        };
+    }
+    /**
+     * Evaluates items sequentially (one at a time).
+     * Useful for debugging or when order matters.
+     *
+     * @param items - Array of items to evaluate
+     * @param autoEvalConfig - Auto-evaluation configuration
+     * @returns Batch evaluation results
+     */
+    async evaluateSequential(items, autoEvalConfig = {}) {
+        // Create a temporary evaluator with sequential config to avoid mutating shared state
+        const sequentialEvaluator = new BatchEvaluator({
+            ...this.config,
+            concurrency: 1,
+        });
+        return sequentialEvaluator.evaluateBatch(items, autoEvalConfig);
+    }
+    /**
+     * Gets the current configuration.
+     */
+    getConfig() {
+        return { ...this.config };
+    }
+    /**
+     * Updates the configuration.
+     *
+     * @param config - New configuration values
+     */
+    updateConfig(config) {
+        this.config = { ...this.config, ...config };
+        // Fresh evaluators are created per evaluation via _createEvaluator(),
+        // so no shared evaluator needs to be re-created here.
+    }
+    /**
+     * Helper to delay execution.
+     */
+    delay(ms) {
+        return new Promise((resolve) => setTimeout(resolve, ms));
+    }
+}
+//# sourceMappingURL=BatchEvaluator.js.map