@juspay/neurolink 9.36.1 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/types/index.d.ts +3 -1
  189. package/dist/lib/types/index.js +3 -2
  190. package/dist/lib/types/scorerTypes.d.ts +423 -0
  191. package/dist/lib/types/scorerTypes.js +6 -0
  192. package/dist/lib/utils/errorHandling.d.ts +20 -0
  193. package/dist/lib/utils/errorHandling.js +60 -0
  194. package/dist/neurolink.d.ts +204 -0
  195. package/dist/neurolink.js +296 -0
  196. package/dist/types/index.d.ts +3 -1
  197. package/dist/types/index.js +3 -2
  198. package/dist/types/scorerTypes.d.ts +423 -0
  199. package/dist/types/scorerTypes.js +5 -0
  200. package/dist/utils/errorHandling.d.ts +20 -0
  201. package/dist/utils/errorHandling.js +60 -0
  202. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -37,6 +37,8 @@ export { AIProviderFactory };
37
37
  export { GoogleTTSHandler } from "./adapters/tts/googleTTSHandler.js";
38
38
  // Config Manager export
39
39
  export { NeuroLinkConfigManager as ConfigManager } from "./config/configManager.js";
40
+ // Core Infrastructure exports (Mastra-inspired patterns)
41
+ export { BaseFactory, BaseRegistry, NeuroLinkFeatureError, createErrorFactory, withRetry, TypedEventEmitter, } from "./core/infrastructure/index.js";
40
42
  // ============================================================================
41
43
  // CLIENT SDK EXPORTS - Type-safe API access for browser and Node.js
42
44
  // Note: React hooks are NOT re-exported here. Import from '@juspay/neurolink/client'.
@@ -303,6 +305,66 @@ export async function initializeTelemetry() {
303
305
  export async function getTelemetryStatus() {
304
306
  return getStatus();
305
307
  }
308
+ // ============================================================================
309
+ // EVALUATION SYSTEM - Comprehensive Response Evaluation & Scoring
310
+ // ============================================================================
311
+ /**
312
+ * Evaluation System Exports
313
+ *
314
+ * A comprehensive evaluation framework for assessing AI response quality,
315
+ * with support for RAGAS-style metrics, custom scorers, and pipeline-based evaluation.
316
+ *
317
+ * @example
318
+ * ```typescript
319
+ * import {
320
+ * Evaluator,
321
+ * ScorerRegistry,
322
+ * EvaluationPipeline,
323
+ * createFaithfulnessScorer,
324
+ * createAnswerRelevancyScorer,
325
+ * } from '@juspay/neurolink';
326
+ *
327
+ * // Create a pipeline with multiple scorers
328
+ * const pipeline = new EvaluationPipeline({
329
+ * scorers: [
330
+ * createFaithfulnessScorer({ model: 'gpt-4' }),
331
+ * createAnswerRelevancyScorer({ model: 'gpt-4' }),
332
+ * ],
333
+ * });
334
+ *
335
+ * // Run evaluation
336
+ * const result = await pipeline.evaluate({
337
+ * question: 'What is quantum computing?',
338
+ * answer: 'Quantum computing uses quantum mechanics...',
339
+ * context: ['Quantum computing is a type of computation...'],
340
+ * });
341
+ * ```
342
+ */
343
+ export {
344
+ // Main Evaluator
345
+ Evaluator,
346
+ // Factory and Registry (Mastra-inspired patterns)
347
+ BatchEvaluator, EvaluationAggregator, EvaluatorFactory, getEvaluatorFactory, EvaluatorRegistry, getEvaluatorRegistry,
348
+ // Error utilities
349
+ EvaluationErrorCodes, evaluationErrors, isRetryableEvaluationError, isEvaluationError, createEvaluationFailedError, createParseError, createStrategyNotFoundError, createProviderError, createMaxRetriesExceededError, createBatchEvaluationError, createConfigurationError, contextToErrorContext,
350
+ // Hooks
351
+ createLangfuseAdapter, createMockLangfuseClient, LangfuseAdapter, startLangfuseAdapter, createConsoleLoggerHook, createMetricsCollectorHook, ObservabilityHooks, observabilityHooks, pipelineToSpanAttributes, scorerToSpanAttributes,
352
+ // Pipeline
353
+ createAndInitializePipeline, createPipeline, EvaluationPipeline, PipelineBuilder, Pipelines, CODE_GENERATION_PIPELINE, COMPREHENSIVE_PIPELINE, CUSTOMER_SUPPORT_PIPELINE, getPreset, getPresetNames, MINIMAL_PIPELINE, PipelinePresets, QUALITY_PIPELINE, RAG_PIPELINE, SAFETY_PIPELINE, SUMMARIZATION_PIPELINE,
354
+ // Strategies
355
+ BatchStrategy, createBatchStrategy, evaluateBatch, streamBatchEvaluation, createSamplingStrategy, DEFAULT_SAMPLING_CONFIG, SamplingStrategies, SamplingStrategy,
356
+ // Reporting
357
+ createMetricsCollector, globalMetricsCollector, MetricsCollector, createReportGenerator, ReportGenerator, Reports,
358
+ // Scorers - Base
359
+ BaseScorer, DEFAULT_SCORE_SCALE as EVAL_DEFAULT_SCORE_SCALE,
360
+ // Scorers - Custom utilities
361
+ composeScorers, createConditionalScorer, createFunctionScorer, createInvertedScorer, createKeywordScorer, createRegexScorer, createScorerMetadata, createSimpleLengthScorer,
362
+ // Scorers - LLM-based
363
+ AnswerRelevancyScorer, createAnswerRelevancyScorer, BaseLLMScorer, DEFAULT_LLM_SCORER_CONFIG, BiasDetectionScorer, createBiasDetectionScorer, ContextPrecisionScorer, createContextPrecisionScorer, ContextRelevancyScorer, createContextRelevancyScorer, createFaithfulnessScorer, FaithfulnessScorer, createHallucinationScorer, HallucinationScorer, createPromptAlignmentScorer, PromptAlignmentScorer, createSummarizationScorer, SummarizationScorer, createToneConsistencyScorer, ToneConsistencyScorer, createToxicityScorer, ToxicityScorer,
364
+ // Scorers - Rule-based
365
+ BaseRuleScorer, DEFAULT_RULE_SCORER_CONFIG, ContentSimilarityScorer, createContentSimilarityScorer, createFormatScorer, FormatScorer, FormatScorerPresets, createKeywordCoverageScorer, KeywordCoverageScorer, createLengthScorer, LengthScorer, LengthScorerPresets,
366
+ // Scorers - Builder & Registry
367
+ ScorerBuilder, Scorers, ScorerRegistry, } from "./evaluation/index.js";
306
368
  /**
307
369
  * Legacy generateText function for backward compatibility.
308
370
  *
@@ -482,33 +544,10 @@ extractMetadata, formatContextWithCitations,
482
544
  GraphRAG, getAvailableStrategies, getCircuitBreaker, getDefaultChunkerConfig, getRecommendedStrategy, HTMLChunker, HTMLLoader, InMemoryBM25Index, InMemoryVectorStore, JSONChunker as RAGJSONChunker, JSONLoader, LaTeXChunker, LLMMetadataExtractor, linearCombination, loadDocument, loadDocuments, MarkdownChunker, MarkdownLoader, MDocument, PDFLoader,
483
545
  // RAG Integration for generate/stream
484
546
  prepareRAGTool, processDocument, RAGCircuitBreaker, RAGCircuitBreakerManager, RAGPipeline, RAGRetryHandler, RecursiveChunker, ragCircuitBreakerManager, reciprocalRankFusion, rerank, SemanticChunker, SentenceChunker, simpleRerank, summarizeContext, TextLoader, TokenChunker, WebLoader, } from "./rag/index.js";
485
- // ============================================================================
486
- // EVALUATION / SCORING - RAGAS-style Response Quality Evaluation
487
- // ============================================================================
547
+ // Legacy RAGAS evaluation classes are now exported from the unified
548
+ // evaluation block above (via ./evaluation/index.js barrel).
549
+ // ContextBuilder is the only class not covered by the barrel export.
488
550
  export { ContextBuilder } from "./evaluation/contextBuilder.js";
489
- /**
490
- * Evaluation system for AI response quality assessment.
491
- *
492
- * Uses RAGAS-style model-based evaluation with a "judge" LLM to score
493
- * responses on relevance, accuracy, completeness, and overall quality.
494
- * Supports retry logic with progressive prompt improvement.
495
- *
496
- * @example
497
- * ```typescript
498
- * import { Evaluator, RAGASEvaluator, ContextBuilder, RetryManager } from '@juspay/neurolink';
499
- *
500
- * const evaluator = new Evaluator({
501
- * evaluationModel: 'gemini-1.5-flash',
502
- * provider: 'vertex',
503
- * threshold: 7,
504
- * });
505
- * ```
506
- */
507
- export { Evaluator } from "./evaluation/index.js";
508
- export { PromptBuilder } from "./evaluation/prompts.js";
509
- export { RAGASEvaluator } from "./evaluation/ragasEvaluator.js";
510
- export { RetryManager } from "./evaluation/retryManager.js";
511
- export { mapToEvaluationData } from "./evaluation/scoring.js";
512
551
  // ============================================================================
513
552
  // AUTHENTICATION PROVIDERS - Multi-provider Auth Integration
514
553
  // ============================================================================
@@ -48,7 +48,7 @@ export declare const AuthProviderError: {
48
48
  retryable?: boolean;
49
49
  details?: Record<string, unknown>;
50
50
  cause?: Error;
51
- } | undefined) => import("../../core/infrastructure/baseError.js").NeuroLinkFeatureError;
51
+ } | undefined) => import("../../index.js").NeuroLinkFeatureError;
52
52
  };
53
53
  /**
54
54
  * Default in-memory session storage
@@ -0,0 +1,163 @@
1
+ /**
2
+ * @file BatchEvaluator - Supports batch evaluation of multiple responses.
3
+ * Enables parallel evaluation with configurable concurrency and error handling.
4
+ */
5
+ import type { LanguageModelV3CallOptions } from "@ai-sdk/provider";
6
+ import type { GenerateResult } from "../types/generateTypes.js";
7
+ import type { EvaluationConfig } from "../types/evaluationTypes.js";
8
+ import type { EvaluationData } from "../types/evaluation.js";
9
+ import type { AutoEvaluationConfig } from "../types/middlewareTypes.js";
10
+ /**
11
+ * Configuration for batch evaluation.
12
+ */
13
+ export interface BatchEvaluationConfig extends EvaluationConfig {
14
+ /** Maximum number of concurrent evaluations (default: 5) */
15
+ concurrency?: number;
16
+ /** Whether to continue on individual failures (default: true) */
17
+ continueOnError?: boolean;
18
+ /** Maximum retries for retryable errors (default: 2) */
19
+ maxRetries?: number;
20
+ /** Delay between retries in milliseconds (default: 1000) */
21
+ retryDelay?: number;
22
+ /** Callback for progress updates */
23
+ onProgress?: (progress: BatchProgress) => void;
24
+ /** Callback for individual evaluation completion */
25
+ onItemComplete?: (result: BatchEvaluationItemResult) => void;
26
+ }
27
+ /**
28
+ * Progress information for batch evaluation.
29
+ */
30
+ export interface BatchProgress {
31
+ /** Total items to evaluate */
32
+ total: number;
33
+ /** Items completed (success + failed) */
34
+ completed: number;
35
+ /** Items that succeeded */
36
+ succeeded: number;
37
+ /** Items that failed */
38
+ failed: number;
39
+ /** Items still pending */
40
+ pending: number;
41
+ /** Percentage complete */
42
+ percentComplete: number;
43
+ }
44
+ /**
45
+ * Input item for batch evaluation.
46
+ */
47
+ export interface BatchEvaluationItem {
48
+ /** Unique identifier for this item */
49
+ id: string;
50
+ /** The generation options */
51
+ options: LanguageModelV3CallOptions;
52
+ /** The generation result to evaluate */
53
+ result: GenerateResult;
54
+ /** Optional item-specific threshold override */
55
+ threshold?: number;
56
+ }
57
+ /**
58
+ * Result for a single item in batch evaluation.
59
+ */
60
+ export interface BatchEvaluationItemResult {
61
+ /** The item ID */
62
+ id: string;
63
+ /** Whether the evaluation succeeded */
64
+ success: boolean;
65
+ /** The evaluation data (if successful) */
66
+ data?: EvaluationData;
67
+ /** Error information (if failed) */
68
+ error?: {
69
+ message: string;
70
+ code?: string;
71
+ retryable?: boolean;
72
+ };
73
+ /** Time taken for this evaluation in milliseconds */
74
+ duration: number;
75
+ /** Number of retry attempts (if any) */
76
+ retryCount: number;
77
+ }
78
+ /**
79
+ * Result of a batch evaluation operation.
80
+ */
81
+ export interface BatchEvaluationResult {
82
+ /** All item results */
83
+ results: BatchEvaluationItemResult[];
84
+ /** Summary statistics */
85
+ summary: {
86
+ /** Total items evaluated */
87
+ total: number;
88
+ /** Number of successful evaluations */
89
+ succeeded: number;
90
+ /** Number of failed evaluations */
91
+ failed: number;
92
+ /** Average evaluation score (for successful items) */
93
+ averageScore: number;
94
+ /** Average evaluation time in milliseconds */
95
+ averageDuration: number;
96
+ /** Total time for batch evaluation */
97
+ totalDuration: number;
98
+ /** Passing rate (percentage of items meeting threshold) */
99
+ passingRate: number;
100
+ };
101
+ /** Whether all evaluations succeeded */
102
+ allSucceeded: boolean;
103
+ }
104
+ /**
105
+ * BatchEvaluator - Performs evaluation on multiple items in parallel.
106
+ * Supports configurable concurrency, retry logic, and progress tracking.
107
+ *
108
+ * @example
109
+ * ```typescript
110
+ * const batchEvaluator = new BatchEvaluator({
111
+ * concurrency: 3,
112
+ * continueOnError: true,
113
+ * onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
114
+ * });
115
+ *
116
+ * const items = [
117
+ * { id: '1', options: opts1, result: result1 },
118
+ * { id: '2', options: opts2, result: result2 },
119
+ * ];
120
+ *
121
+ * const batchResult = await batchEvaluator.evaluateBatch(items);
122
+ * console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
123
+ * ```
124
+ */
125
+ export declare class BatchEvaluator {
126
+ private config;
127
+ constructor(config?: BatchEvaluationConfig);
128
+ /**
129
+ * Create a fresh Evaluator instance for each evaluation to avoid leaking state.
130
+ */
131
+ private _createEvaluator;
132
+ /**
133
+ * Evaluates a batch of items in parallel with controlled concurrency.
134
+ *
135
+ * @param items - Array of items to evaluate
136
+ * @param autoEvalConfig - Auto-evaluation configuration for thresholds
137
+ * @returns Batch evaluation results with summary statistics
138
+ */
139
+ evaluateBatch(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
140
+ /**
141
+ * Evaluates items sequentially (one at a time).
142
+ * Useful for debugging or when order matters.
143
+ *
144
+ * @param items - Array of items to evaluate
145
+ * @param autoEvalConfig - Auto-evaluation configuration
146
+ * @returns Batch evaluation results
147
+ */
148
+ evaluateSequential(items: BatchEvaluationItem[], autoEvalConfig?: AutoEvaluationConfig): Promise<BatchEvaluationResult>;
149
+ /**
150
+ * Gets the current configuration.
151
+ */
152
+ getConfig(): BatchEvaluationConfig;
153
+ /**
154
+ * Updates the configuration.
155
+ *
156
+ * @param config - New configuration values
157
+ */
158
+ updateConfig(config: Partial<BatchEvaluationConfig>): void;
159
+ /**
160
+ * Helper to delay execution.
161
+ */
162
+ private delay;
163
+ }
@@ -0,0 +1,268 @@
1
+ /**
2
+ * @file BatchEvaluator - Supports batch evaluation of multiple responses.
3
+ * Enables parallel evaluation with configurable concurrency and error handling.
4
+ */
5
+ import { Evaluator } from "./index.js";
6
+ import { createBatchEvaluationError, isRetryableEvaluationError, } from "./errors/EvaluationError.js";
7
+ import { logger } from "../utils/logger.js";
8
+ import { NeuroLinkFeatureError } from "../core/infrastructure/index.js";
9
+ /**
10
+ * BatchEvaluator - Performs evaluation on multiple items in parallel.
11
+ * Supports configurable concurrency, retry logic, and progress tracking.
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * const batchEvaluator = new BatchEvaluator({
16
+ * concurrency: 3,
17
+ * continueOnError: true,
18
+ * onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
19
+ * });
20
+ *
21
+ * const items = [
22
+ * { id: '1', options: opts1, result: result1 },
23
+ * { id: '2', options: opts2, result: result2 },
24
+ * ];
25
+ *
26
+ * const batchResult = await batchEvaluator.evaluateBatch(items);
27
+ * console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
28
+ * ```
29
+ */
30
+ export class BatchEvaluator {
31
+ config;
32
+ constructor(config = {}) {
33
+ this.config = {
34
+ concurrency: 5,
35
+ continueOnError: true,
36
+ maxRetries: 2,
37
+ retryDelay: 1000,
38
+ ...config,
39
+ };
40
+ }
41
+ /**
42
+ * Create a fresh Evaluator instance for each evaluation to avoid leaking state.
43
+ */
44
+ _createEvaluator() {
45
+ return new Evaluator(this.config);
46
+ }
47
+ /**
48
+ * Evaluates a batch of items in parallel with controlled concurrency.
49
+ *
50
+ * @param items - Array of items to evaluate
51
+ * @param autoEvalConfig - Auto-evaluation configuration for thresholds
52
+ * @returns Batch evaluation results with summary statistics
53
+ */
54
+ async evaluateBatch(items, autoEvalConfig = {}) {
55
+ const startTime = Date.now();
56
+ const results = [];
57
+ const concurrency = this.config.concurrency || 5;
58
+ // Track progress
59
+ let completed = 0;
60
+ let succeeded = 0;
61
+ let failed = 0;
62
+ const reportProgress = () => {
63
+ if (this.config.onProgress) {
64
+ try {
65
+ this.config.onProgress({
66
+ total: items.length,
67
+ completed,
68
+ succeeded,
69
+ failed,
70
+ pending: items.length - completed,
71
+ percentComplete: Math.round((completed / items.length) * 100),
72
+ });
73
+ }
74
+ catch (callbackError) {
75
+ logger.warn("[BatchEvaluator] onProgress callback threw an error", {
76
+ error: callbackError instanceof Error
77
+ ? callbackError.message
78
+ : String(callbackError),
79
+ });
80
+ }
81
+ }
82
+ };
83
+ // Process items with concurrency limit
84
+ const processItem = async (item) => {
85
+ const itemStartTime = Date.now();
86
+ let retryCount = 0;
87
+ let lastError;
88
+ while (retryCount <= (this.config.maxRetries || 2)) {
89
+ try {
90
+ const threshold = item.threshold ||
91
+ autoEvalConfig.threshold ||
92
+ this.config.threshold ||
93
+ 7;
94
+ // Create fresh evaluator per attempt to avoid leaking state
95
+ const evaluator = this._createEvaluator();
96
+ const data = await evaluator.evaluate(item.options, item.result, threshold, {
97
+ ...autoEvalConfig,
98
+ threshold,
99
+ });
100
+ const result = {
101
+ id: item.id,
102
+ success: true,
103
+ data,
104
+ duration: Date.now() - itemStartTime,
105
+ retryCount,
106
+ };
107
+ succeeded++;
108
+ completed++;
109
+ reportProgress();
110
+ if (this.config.onItemComplete) {
111
+ try {
112
+ this.config.onItemComplete(result);
113
+ }
114
+ catch (callbackError) {
115
+ logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
116
+ error: callbackError instanceof Error
117
+ ? callbackError.message
118
+ : String(callbackError),
119
+ });
120
+ }
121
+ }
122
+ return result;
123
+ }
124
+ catch (error) {
125
+ lastError = error;
126
+ // Check if error is retryable
127
+ const isRetryable = error instanceof NeuroLinkFeatureError &&
128
+ isRetryableEvaluationError(error);
129
+ if (isRetryable && retryCount < (this.config.maxRetries || 2)) {
130
+ retryCount++;
131
+ logger.debug(`[BatchEvaluator.evaluateBatch] Retrying evaluation for item ${item.id}`, { attempt: retryCount + 1, itemId: item.id });
132
+ await this.delay(this.config.retryDelay || 1000);
133
+ continue;
134
+ }
135
+ // Not retryable or max retries exceeded
136
+ break;
137
+ }
138
+ }
139
+ // Failed after all retries
140
+ const errorResult = {
141
+ id: item.id,
142
+ success: false,
143
+ error: {
144
+ message: lastError?.message || "Unknown error",
145
+ code: lastError instanceof NeuroLinkFeatureError
146
+ ? lastError.code
147
+ : undefined,
148
+ retryable: lastError instanceof NeuroLinkFeatureError
149
+ ? lastError.retryable
150
+ : false,
151
+ },
152
+ duration: Date.now() - itemStartTime,
153
+ retryCount,
154
+ };
155
+ failed++;
156
+ completed++;
157
+ reportProgress();
158
+ if (this.config.onItemComplete) {
159
+ try {
160
+ this.config.onItemComplete(errorResult);
161
+ }
162
+ catch (callbackError) {
163
+ logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
164
+ error: callbackError instanceof Error
165
+ ? callbackError.message
166
+ : String(callbackError),
167
+ });
168
+ }
169
+ }
170
+ if (!this.config.continueOnError) {
171
+ throw lastError;
172
+ }
173
+ return errorResult;
174
+ };
175
+ // Process items in batches based on concurrency
176
+ for (let i = 0; i < items.length; i += concurrency) {
177
+ const batch = items.slice(i, i + concurrency);
178
+ const settled = await Promise.allSettled(batch.map(processItem));
179
+ const batchResults = [];
180
+ for (const outcome of settled) {
181
+ if (outcome.status === "fulfilled") {
182
+ batchResults.push(outcome.value);
183
+ }
184
+ // Rejected outcomes are already handled inside processItem
185
+ // (errors are caught and returned as error results when continueOnError is true,
186
+ // or re-thrown which causes the settled entry to be 'rejected')
187
+ }
188
+ results.push(...batchResults);
189
+ // If continueOnError is false and any item in this batch was rejected, throw aggregate
190
+ if (!this.config.continueOnError) {
191
+ const rejections = settled.filter((s) => s.status === "rejected");
192
+ if (rejections.length > 0) {
193
+ const failedItems = results
194
+ .filter((r) => !r.success)
195
+ .map((r, idx) => ({
196
+ index: idx,
197
+ error: new Error(r.error?.message || "Unknown error"),
198
+ }));
199
+ throw createBatchEvaluationError(rejections.length, items.length, failedItems);
200
+ }
201
+ }
202
+ }
203
+ // Calculate summary statistics
204
+ const successfulResults = results.filter((r) => r.success && r.data);
205
+ const scores = successfulResults.map((r) => r.data.overall);
206
+ const passingScores = successfulResults.filter((r) => r.data.overall >=
207
+ (autoEvalConfig.threshold || this.config.threshold || 7));
208
+ const summary = {
209
+ total: items.length,
210
+ succeeded,
211
+ failed,
212
+ averageScore: scores.length > 0
213
+ ? scores.reduce((a, b) => a + b, 0) / scores.length
214
+ : 0,
215
+ averageDuration: results.length > 0
216
+ ? results.reduce((a, b) => a + b.duration, 0) / results.length
217
+ : 0,
218
+ totalDuration: Date.now() - startTime,
219
+ passingRate: successfulResults.length > 0
220
+ ? (passingScores.length / successfulResults.length) * 100
221
+ : 0,
222
+ };
223
+ return {
224
+ results,
225
+ summary,
226
+ allSucceeded: failed === 0,
227
+ };
228
+ }
229
+ /**
230
+ * Evaluates items sequentially (one at a time).
231
+ * Useful for debugging or when order matters.
232
+ *
233
+ * @param items - Array of items to evaluate
234
+ * @param autoEvalConfig - Auto-evaluation configuration
235
+ * @returns Batch evaluation results
236
+ */
237
+ async evaluateSequential(items, autoEvalConfig = {}) {
238
+ // Create a temporary evaluator with sequential config to avoid mutating shared state
239
+ const sequentialEvaluator = new BatchEvaluator({
240
+ ...this.config,
241
+ concurrency: 1,
242
+ });
243
+ return sequentialEvaluator.evaluateBatch(items, autoEvalConfig);
244
+ }
245
+ /**
246
+ * Gets the current configuration.
247
+ */
248
+ getConfig() {
249
+ return { ...this.config };
250
+ }
251
+ /**
252
+ * Updates the configuration.
253
+ *
254
+ * @param config - New configuration values
255
+ */
256
+ updateConfig(config) {
257
+ this.config = { ...this.config, ...config };
258
+ // Fresh evaluators are created per evaluation via _createEvaluator(),
259
+ // so no shared evaluator needs to be re-created here.
260
+ }
261
+ /**
262
+ * Helper to delay execution.
263
+ */
264
+ delay(ms) {
265
+ return new Promise((resolve) => setTimeout(resolve, ms));
266
+ }
267
+ }
268
+ //# sourceMappingURL=BatchEvaluator.js.map