@juspay/neurolink 9.36.0 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/commands/proxy.js +6 -6
  9. package/dist/cli/parser.js +4 -1
  10. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  11. package/dist/evaluation/BatchEvaluator.js +267 -0
  12. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  13. package/dist/evaluation/EvaluationAggregator.js +377 -0
  14. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  15. package/dist/evaluation/EvaluatorFactory.js +280 -0
  16. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  17. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  18. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  19. package/dist/evaluation/errors/EvaluationError.js +206 -0
  20. package/dist/evaluation/errors/index.d.ts +4 -0
  21. package/dist/evaluation/errors/index.js +4 -0
  22. package/dist/evaluation/hooks/index.d.ts +6 -0
  23. package/dist/evaluation/hooks/index.js +6 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  25. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  26. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  27. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  28. package/dist/evaluation/index.d.ts +11 -2
  29. package/dist/evaluation/index.js +15 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  31. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  32. package/dist/evaluation/pipeline/index.d.ts +8 -0
  33. package/dist/evaluation/pipeline/index.js +8 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  35. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  36. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  37. package/dist/evaluation/pipeline/presets.js +224 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  39. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  40. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  41. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  43. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  44. package/dist/evaluation/reporting/index.d.ts +6 -0
  45. package/dist/evaluation/reporting/index.js +6 -0
  46. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  47. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  48. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  49. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  50. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  51. package/dist/evaluation/scorers/baseScorer.js +232 -0
  52. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  53. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  54. package/dist/evaluation/scorers/index.d.ts +10 -0
  55. package/dist/evaluation/scorers/index.js +16 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  57. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  59. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  61. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  63. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  65. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  67. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  69. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  70. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  71. package/dist/evaluation/scorers/llm/index.js +16 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  73. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  75. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  77. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  79. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  81. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  83. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  85. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  86. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  87. package/dist/evaluation/scorers/rule/index.js +10 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  89. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  91. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  92. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  93. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  94. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  95. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  96. package/dist/index.d.ts +37 -25
  97. package/dist/index.js +65 -26
  98. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  99. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  100. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  102. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  104. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  106. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  108. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  109. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  110. package/dist/lib/evaluation/errors/index.js +5 -0
  111. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  112. package/dist/lib/evaluation/hooks/index.js +7 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  114. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  116. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  117. package/dist/lib/evaluation/index.d.ts +11 -2
  118. package/dist/lib/evaluation/index.js +15 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  120. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  121. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  122. package/dist/lib/evaluation/pipeline/index.js +9 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  124. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  125. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  126. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  128. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  130. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  132. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  133. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  134. package/dist/lib/evaluation/reporting/index.js +7 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  136. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  138. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  140. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  142. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  143. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  144. package/dist/lib/evaluation/scorers/index.js +17 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  146. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  148. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  150. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  152. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  154. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  156. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  158. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  159. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  160. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  162. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  164. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  166. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  168. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  170. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  172. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  174. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  175. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  176. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  178. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  180. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  182. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  184. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  185. package/dist/lib/index.d.ts +37 -25
  186. package/dist/lib/index.js +65 -26
  187. package/dist/lib/neurolink.d.ts +204 -0
  188. package/dist/lib/neurolink.js +296 -0
  189. package/dist/lib/types/index.d.ts +3 -1
  190. package/dist/lib/types/index.js +3 -2
  191. package/dist/lib/types/scorerTypes.d.ts +423 -0
  192. package/dist/lib/types/scorerTypes.js +6 -0
  193. package/dist/lib/utils/errorHandling.d.ts +20 -0
  194. package/dist/lib/utils/errorHandling.js +60 -0
  195. package/dist/neurolink.d.ts +204 -0
  196. package/dist/neurolink.js +296 -0
  197. package/dist/types/index.d.ts +3 -1
  198. package/dist/types/index.js +3 -2
  199. package/dist/types/scorerTypes.d.ts +423 -0
  200. package/dist/types/scorerTypes.js +5 -0
  201. package/dist/utils/errorHandling.d.ts +20 -0
  202. package/dist/utils/errorHandling.js +60 -0
  203. package/package.json +1 -1
@@ -1646,6 +1646,210 @@ export declare class NeuroLink {
1646
1646
  * Unregister all external MCP tools from the main registry
1647
1647
  */
1648
1648
  private unregisterAllExternalMCPToolsFromRegistry;
1649
+ /**
1650
+ * Create an evaluation pipeline with the specified configuration or preset.
1651
+ * Pipelines orchestrate multiple scorers to evaluate AI responses comprehensively.
1652
+ *
1653
+ * @param configOrPreset - Pipeline configuration object or preset name
1654
+ * @returns Initialized evaluation pipeline
1655
+ *
1656
+ * @example Using a preset
1657
+ * ```typescript
1658
+ * const neurolink = new NeuroLink();
1659
+ * const pipeline = await neurolink.createEvaluationPipeline('rag');
1660
+ * const result = await pipeline.execute({
1661
+ * query: 'What is the capital of France?',
1662
+ * response: 'Paris is the capital of France.',
1663
+ * context: ['France is a country in Europe. Paris is its capital.']
1664
+ * });
1665
+ * console.log(result.overallScore, result.passed);
1666
+ * ```
1667
+ *
1668
+ * @example Using custom configuration
1669
+ * ```typescript
1670
+ * const pipeline = await neurolink.createEvaluationPipeline({
1671
+ * name: 'custom-quality',
1672
+ * scorers: [
1673
+ * { id: 'toxicity', config: { threshold: 0.9 } },
1674
+ * { id: 'hallucination', config: { weight: 1.5 } },
1675
+ * { id: 'answer-relevancy' }
1676
+ * ],
1677
+ * aggregation: { method: 'weighted' },
1678
+ * passThreshold: 0.8
1679
+ * });
1680
+ * ```
1681
+ */
1682
+ createEvaluationPipeline(configOrPreset: import("./types/scorerTypes.js").PipelineConfig | "safety" | "rag" | "quality" | "comprehensive" | "minimal" | "summarization" | "customerSupport" | "codeGeneration"): Promise<import("./evaluation/pipeline/evaluationPipeline.js").EvaluationPipeline>;
1683
+ /**
1684
+ * Evaluate an AI response using the specified pipeline or scorers.
1685
+ * This is a convenience method that creates a pipeline and executes it in one call.
1686
+ *
1687
+ * @param input - Scorer input containing query, response, and optional context
1688
+ * @param options - Evaluation options including pipeline preset or custom scorers
1689
+ * @returns Evaluation pipeline result with scores and pass/fail status
1690
+ *
1691
+ * @example Using a preset
1692
+ * ```typescript
1693
+ * const neurolink = new NeuroLink();
1694
+ * const result = await neurolink.evaluate(
1695
+ * {
1696
+ * query: 'Explain quantum computing',
1697
+ * response: 'Quantum computing uses qubits...'
1698
+ * },
1699
+ * { pipeline: 'quality' }
1700
+ * );
1701
+ * console.log(`Score: ${result.overallScore}, Passed: ${result.passed}`);
1702
+ * ```
1703
+ *
1704
+ * @example Using specific scorers
1705
+ * ```typescript
1706
+ * const result = await neurolink.evaluate(
1707
+ * {
1708
+ * query: 'What causes rain?',
1709
+ * response: 'Rain is caused by water vapor...',
1710
+ * context: ['The water cycle involves evaporation...']
1711
+ * },
1712
+ * { scorers: ['hallucination', 'faithfulness', 'answer-relevancy'] }
1713
+ * );
1714
+ * ```
1715
+ *
1716
+ * @example Full RAG evaluation
1717
+ * ```typescript
1718
+ * const result = await neurolink.evaluate(
1719
+ * {
1720
+ * query: 'Who wrote Hamlet?',
1721
+ * response: 'Shakespeare wrote Hamlet in 1600.',
1722
+ * context: ['William Shakespeare wrote Hamlet around 1600-1601.'],
1723
+ * groundTruth: 'William Shakespeare'
1724
+ * },
1725
+ * { pipeline: 'rag' }
1726
+ * );
1727
+ * ```
1728
+ */
1729
+ evaluate(input: import("./types/scorerTypes.js").ScorerInput, options?: {
1730
+ /** Pipeline preset to use */
1731
+ pipeline?: "safety" | "rag" | "quality" | "comprehensive" | "minimal" | "summarization" | "customerSupport" | "codeGeneration";
1732
+ /** Specific scorers to use (alternative to pipeline) */
1733
+ scorers?: string[];
1734
+ /** Pass threshold override (0-1) */
1735
+ passThreshold?: number;
1736
+ /** Execution mode */
1737
+ executionMode?: "parallel" | "sequential";
1738
+ /** Correlation ID for tracing */
1739
+ correlationId?: string;
1740
+ /** Overall evaluation timeout in milliseconds */
1741
+ timeoutMs?: number;
1742
+ }): Promise<import("./evaluation/pipeline/evaluationPipeline.js").PipelineResult>;
1743
+ /**
1744
+ * Score a response using a single scorer.
1745
+ * Useful for quick, targeted evaluations without the overhead of a full pipeline.
1746
+ *
1747
+ * @param scorerId - The ID of the scorer to use (e.g., 'toxicity', 'hallucination')
1748
+ * @param input - Scorer input containing query, response, and optional context
1749
+ * @param config - Optional scorer configuration overrides
1750
+ * @returns Score result with value, reasoning, and pass/fail status
1751
+ *
1752
+ * @example Basic scoring
1753
+ * ```typescript
1754
+ * const neurolink = new NeuroLink();
1755
+ * const result = await neurolink.score('toxicity', {
1756
+ * query: '',
1757
+ * response: 'This is a helpful response about cooking recipes.'
1758
+ * });
1759
+ * console.log(`Toxicity Score: ${result.score}/10, Passed: ${result.passed}`);
1760
+ * ```
1761
+ *
1762
+ * @example Hallucination detection
1763
+ * ```typescript
1764
+ * const result = await neurolink.score('hallucination', {
1765
+ * query: 'What year was the Eiffel Tower built?',
1766
+ * response: 'The Eiffel Tower was built in 1889.',
1767
+ * context: ['The Eiffel Tower was constructed from 1887-1889.']
1768
+ * });
1769
+ * console.log(`Score: ${result.score}, Reasoning: ${result.reasoning}`);
1770
+ * ```
1771
+ *
1772
+ * @example With custom threshold
1773
+ * ```typescript
1774
+ * const result = await neurolink.score(
1775
+ * 'faithfulness',
1776
+ * {
1777
+ * query: 'Summarize the article',
1778
+ * response: 'The article discusses...',
1779
+ * context: ['Article content here...']
1780
+ * },
1781
+ * { threshold: 0.85, weight: 1.5 }
1782
+ * );
1783
+ * ```
1784
+ */
1785
+ score(scorerId: string, input: import("./types/scorerTypes.js").ScorerInput, config?: import("./types/scorerTypes.js").ScorerConfig): Promise<import("./types/scorerTypes.js").ScoreResult>;
1786
+ /**
1787
+ * Get a list of all available scorers and their metadata.
1788
+ * Useful for discovering what evaluation capabilities are available.
1789
+ *
1790
+ * @param options - Filter options
1791
+ * @returns Array of scorer metadata
1792
+ *
1793
+ * @example List all scorers
1794
+ * ```typescript
1795
+ * const neurolink = new NeuroLink();
1796
+ * const scorers = await neurolink.getAvailableScorers();
1797
+ * for (const scorer of scorers) {
1798
+ * console.log(`${scorer.id}: ${scorer.description} (${scorer.type})`);
1799
+ * }
1800
+ * ```
1801
+ *
1802
+ * @example Filter by category
1803
+ * ```typescript
1804
+ * const safetyScorers = await neurolink.getAvailableScorers({
1805
+ * category: 'safety'
1806
+ * });
1807
+ * console.log('Safety scorers:', safetyScorers.map(s => s.id));
1808
+ * ```
1809
+ *
1810
+ * @example Filter by type
1811
+ * ```typescript
1812
+ * const ruleBasedScorers = await neurolink.getAvailableScorers({
1813
+ * type: 'rule'
1814
+ * });
1815
+ * ```
1816
+ */
1817
+ getAvailableScorers(options?: {
1818
+ /** Filter by category */
1819
+ category?: import("./types/scorerTypes.js").ScorerCategory;
1820
+ /** Filter by type */
1821
+ type?: import("./types/scorerTypes.js").ScorerType;
1822
+ }): Promise<import("./types/scorerTypes.js").ScorerMetadata[]>;
1823
+ /**
1824
+ * Get a list of available evaluation pipeline presets.
1825
+ * Presets are pre-configured pipelines for common evaluation scenarios.
1826
+ *
1827
+ * @returns Array of preset names
1828
+ *
1829
+ * @example
1830
+ * ```typescript
1831
+ * const neurolink = new NeuroLink();
1832
+ * const presets = await neurolink.getEvaluationPresets();
1833
+ * console.log('Available presets:', presets);
1834
+ * // Output: ['safety', 'rag', 'quality', 'comprehensive', 'minimal', ...]
1835
+ * ```
1836
+ */
1837
+ getEvaluationPresets(): Promise<string[]>;
1838
+ /**
1839
+ * Get details of a specific evaluation preset.
1840
+ *
1841
+ * @param presetName - Name of the preset
1842
+ * @returns Pipeline configuration for the preset
1843
+ *
1844
+ * @example
1845
+ * ```typescript
1846
+ * const neurolink = new NeuroLink();
1847
+ * const ragPreset = await neurolink.getEvaluationPreset('rag');
1848
+ * console.log('RAG preset scorers:', ragPreset.scorers.map(s => s.id));
1849
+ * console.log('Pass threshold:', ragPreset.passThreshold);
1850
+ * ```
1851
+ */
1852
+ getEvaluationPreset(presetName: "safety" | "rag" | "quality" | "comprehensive" | "minimal" | "summarization" | "customerSupport" | "codeGeneration"): Promise<import("./types/scorerTypes.js").PipelineConfig>;
1649
1853
  /**
1650
1854
  * Dispose of all resources and cleanup connections
1651
1855
  * Call this method when done using the NeuroLink instance to prevent resource leaks
package/dist/neurolink.js CHANGED
@@ -7927,6 +7927,302 @@ Current user's request: ${currentInput}`;
7927
7927
  mcpLogger.error("[NeuroLink] Failed to unregister all external MCP tools from registry:", error);
7928
7928
  }
7929
7929
  }
7930
+ // ========================================
7931
+ // Evaluation & Scoring API
7932
+ // ========================================
7933
+ /**
7934
+ * Create an evaluation pipeline with the specified configuration or preset.
7935
+ * Pipelines orchestrate multiple scorers to evaluate AI responses comprehensively.
7936
+ *
7937
+ * @param configOrPreset - Pipeline configuration object or preset name
7938
+ * @returns Initialized evaluation pipeline
7939
+ *
7940
+ * @example Using a preset
7941
+ * ```typescript
7942
+ * const neurolink = new NeuroLink();
7943
+ * const pipeline = await neurolink.createEvaluationPipeline('rag');
7944
+ * const result = await pipeline.execute({
7945
+ * query: 'What is the capital of France?',
7946
+ * response: 'Paris is the capital of France.',
7947
+ * context: ['France is a country in Europe. Paris is its capital.']
7948
+ * });
7949
+ * console.log(result.overallScore, result.passed);
7950
+ * ```
7951
+ *
7952
+ * @example Using custom configuration
7953
+ * ```typescript
7954
+ * const pipeline = await neurolink.createEvaluationPipeline({
7955
+ * name: 'custom-quality',
7956
+ * scorers: [
7957
+ * { id: 'toxicity', config: { threshold: 0.9 } },
7958
+ * { id: 'hallucination', config: { weight: 1.5 } },
7959
+ * { id: 'answer-relevancy' }
7960
+ * ],
7961
+ * aggregation: { method: 'weighted' },
7962
+ * passThreshold: 0.8
7963
+ * });
7964
+ * ```
7965
+ */
7966
+ async createEvaluationPipeline(configOrPreset) {
7967
+ const { EvaluationPipeline, getPreset } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
7968
+ let config;
7969
+ if (typeof configOrPreset === "string") {
7970
+ // It's a preset name
7971
+ config = getPreset(configOrPreset);
7972
+ }
7973
+ else {
7974
+ // It's a custom configuration
7975
+ config = configOrPreset;
7976
+ }
7977
+ const pipeline = new EvaluationPipeline(config);
7978
+ // Note: withTimeout races the promise but does not abort in-flight LLM calls.
7979
+ // Full AbortController propagation into pipeline/scorer internals is planned.
7980
+ await withTimeout(pipeline.initialize(), 30000, ErrorFactory.evaluationTimeout("pipeline initialization", 30000));
7981
+ logger.debug(`[NeuroLink] Created evaluation pipeline: ${config.name ?? "custom"}`);
7982
+ return pipeline;
7983
+ }
7984
+ /**
7985
+ * Evaluate an AI response using the specified pipeline or scorers.
7986
+ * This is a convenience method that creates a pipeline and executes it in one call.
7987
+ *
7988
+ * @param input - Scorer input containing query, response, and optional context
7989
+ * @param options - Evaluation options including pipeline preset or custom scorers
7990
+ * @returns Evaluation pipeline result with scores and pass/fail status
7991
+ *
7992
+ * @example Using a preset
7993
+ * ```typescript
7994
+ * const neurolink = new NeuroLink();
7995
+ * const result = await neurolink.evaluate(
7996
+ * {
7997
+ * query: 'Explain quantum computing',
7998
+ * response: 'Quantum computing uses qubits...'
7999
+ * },
8000
+ * { pipeline: 'quality' }
8001
+ * );
8002
+ * console.log(`Score: ${result.overallScore}, Passed: ${result.passed}`);
8003
+ * ```
8004
+ *
8005
+ * @example Using specific scorers
8006
+ * ```typescript
8007
+ * const result = await neurolink.evaluate(
8008
+ * {
8009
+ * query: 'What causes rain?',
8010
+ * response: 'Rain is caused by water vapor...',
8011
+ * context: ['The water cycle involves evaporation...']
8012
+ * },
8013
+ * { scorers: ['hallucination', 'faithfulness', 'answer-relevancy'] }
8014
+ * );
8015
+ * ```
8016
+ *
8017
+ * @example Full RAG evaluation
8018
+ * ```typescript
8019
+ * const result = await neurolink.evaluate(
8020
+ * {
8021
+ * query: 'Who wrote Hamlet?',
8022
+ * response: 'Shakespeare wrote Hamlet in 1600.',
8023
+ * context: ['William Shakespeare wrote Hamlet around 1600-1601.'],
8024
+ * groundTruth: 'William Shakespeare'
8025
+ * },
8026
+ * { pipeline: 'rag' }
8027
+ * );
8028
+ * ```
8029
+ */
8030
+ async evaluate(input, options) {
8031
+ const { EvaluationPipeline, getPreset } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
8032
+ let config;
8033
+ // Fail fast on conflicting or empty evaluator selection
8034
+ if (options?.pipeline && options?.scorers) {
8035
+ throw new Error("Cannot specify both 'pipeline' and 'scorers' options. Use one or the other.");
8036
+ }
8037
+ if (options?.scorers && options.scorers.length === 0) {
8038
+ throw new Error("The 'scorers' array must not be empty. Provide at least one scorer ID or omit the option to use the default 'quality' preset.");
8039
+ }
8040
+ if (options?.pipeline) {
8041
+ // Use preset
8042
+ config = { ...getPreset(options.pipeline) };
8043
+ }
8044
+ else if (options?.scorers && options.scorers.length > 0) {
8045
+ // Use custom scorers
8046
+ config = {
8047
+ name: "SDK Evaluation",
8048
+ description: "Evaluation from NeuroLink SDK",
8049
+ scorers: options.scorers.map((id) => ({ id })),
8050
+ executionMode: options.executionMode ?? "parallel",
8051
+ passThreshold: options.passThreshold ?? 0.7,
8052
+ };
8053
+ }
8054
+ else {
8055
+ // Default to quality preset
8056
+ config = getPreset("quality");
8057
+ }
8058
+ // Apply overrides
8059
+ if (options?.passThreshold !== undefined) {
8060
+ config.passThreshold = options.passThreshold;
8061
+ }
8062
+ if (options?.executionMode !== undefined) {
8063
+ config.executionMode = options.executionMode;
8064
+ }
8065
+ const pipeline = new EvaluationPipeline(config);
8066
+ await withTimeout(pipeline.initialize(), 30000, ErrorFactory.evaluationTimeout("pipeline initialization", 30000));
8067
+ const executionTimeoutMs = options?.timeoutMs ?? 60000;
8068
+ const result = await withTimeout(pipeline.execute(input, {
8069
+ correlationId: options?.correlationId,
8070
+ }), executionTimeoutMs, ErrorFactory.evaluationTimeout("pipeline execution", executionTimeoutMs));
8071
+ logger.debug(`[NeuroLink] Evaluation completed`, {
8072
+ pipeline: config.name,
8073
+ overallScore: result.overallScore,
8074
+ passed: result.passed,
8075
+ scorerCount: result.scores.length,
8076
+ });
8077
+ return result;
8078
+ }
8079
+ /**
8080
+ * Score a response using a single scorer.
8081
+ * Useful for quick, targeted evaluations without the overhead of a full pipeline.
8082
+ *
8083
+ * @param scorerId - The ID of the scorer to use (e.g., 'toxicity', 'hallucination')
8084
+ * @param input - Scorer input containing query, response, and optional context
8085
+ * @param config - Optional scorer configuration overrides
8086
+ * @returns Score result with value, reasoning, and pass/fail status
8087
+ *
8088
+ * @example Basic scoring
8089
+ * ```typescript
8090
+ * const neurolink = new NeuroLink();
8091
+ * const result = await neurolink.score('toxicity', {
8092
+ * query: '',
8093
+ * response: 'This is a helpful response about cooking recipes.'
8094
+ * });
8095
+ * console.log(`Toxicity Score: ${result.score}/10, Passed: ${result.passed}`);
8096
+ * ```
8097
+ *
8098
+ * @example Hallucination detection
8099
+ * ```typescript
8100
+ * const result = await neurolink.score('hallucination', {
8101
+ * query: 'What year was the Eiffel Tower built?',
8102
+ * response: 'The Eiffel Tower was built in 1889.',
8103
+ * context: ['The Eiffel Tower was constructed from 1887-1889.']
8104
+ * });
8105
+ * console.log(`Score: ${result.score}, Reasoning: ${result.reasoning}`);
8106
+ * ```
8107
+ *
8108
+ * @example With custom threshold
8109
+ * ```typescript
8110
+ * const result = await neurolink.score(
8111
+ * 'faithfulness',
8112
+ * {
8113
+ * query: 'Summarize the article',
8114
+ * response: 'The article discusses...',
8115
+ * context: ['Article content here...']
8116
+ * },
8117
+ * { threshold: 0.85, weight: 1.5 }
8118
+ * );
8119
+ * ```
8120
+ */
8121
+ async score(scorerId, input, config) {
8122
+ const { ScorerRegistry } = await withTimeout(import("./evaluation/scorers/index.js"), 10000, ErrorFactory.evaluationTimeout("scorer module load", 10000));
8123
+ // Ensure built-in scorers are registered
8124
+ await withTimeout(ScorerRegistry.registerBuiltInScorers(), 30000, ErrorFactory.evaluationTimeout("scorer bootstrap", 30000));
8125
+ // Get the scorer
8126
+ const scorer = await withTimeout(ScorerRegistry.getScorer(scorerId, config), 30000, ErrorFactory.evaluationTimeout(`scorer load: ${scorerId}`, 30000));
8127
+ if (!scorer) {
8128
+ throw ErrorFactory.scorerNotFound(scorerId);
8129
+ }
8130
+ // Validate input
8131
+ const validation = scorer.validateInput(input);
8132
+ if (!validation.valid) {
8133
+ throw ErrorFactory.evaluationValidationFailed(scorerId, validation.errors);
8134
+ }
8135
+ // Execute scoring
8136
+ const result = await withTimeout(scorer.score(input), 60000, ErrorFactory.evaluationTimeout("scorer execution", 60000));
8137
+ logger.debug(`[NeuroLink] Scoring completed`, {
8138
+ scorerId,
8139
+ score: result.score,
8140
+ passed: result.passed,
8141
+ computeTime: result.computeTime,
8142
+ });
8143
+ return result;
8144
+ }
8145
+ /**
8146
+ * Get a list of all available scorers and their metadata.
8147
+ * Useful for discovering what evaluation capabilities are available.
8148
+ *
8149
+ * @param options - Filter options
8150
+ * @returns Array of scorer metadata
8151
+ *
8152
+ * @example List all scorers
8153
+ * ```typescript
8154
+ * const neurolink = new NeuroLink();
8155
+ * const scorers = await neurolink.getAvailableScorers();
8156
+ * for (const scorer of scorers) {
8157
+ * console.log(`${scorer.id}: ${scorer.description} (${scorer.type})`);
8158
+ * }
8159
+ * ```
8160
+ *
8161
+ * @example Filter by category
8162
+ * ```typescript
8163
+ * const safetyScorers = await neurolink.getAvailableScorers({
8164
+ * category: 'safety'
8165
+ * });
8166
+ * console.log('Safety scorers:', safetyScorers.map(s => s.id));
8167
+ * ```
8168
+ *
8169
+ * @example Filter by type
8170
+ * ```typescript
8171
+ * const ruleBasedScorers = await neurolink.getAvailableScorers({
8172
+ * type: 'rule'
8173
+ * });
8174
+ * ```
8175
+ */
8176
+ async getAvailableScorers(options) {
8177
+ const { ScorerRegistry } = await withTimeout(import("./evaluation/scorers/index.js"), 10000, ErrorFactory.evaluationTimeout("scorer module load", 10000));
8178
+ // Ensure built-in scorers are registered
8179
+ await withTimeout(ScorerRegistry.registerBuiltInScorers(), 30000, ErrorFactory.evaluationTimeout("scorer bootstrap", 30000));
8180
+ let scorers = ScorerRegistry.list();
8181
+ // Apply filters
8182
+ if (options?.category) {
8183
+ scorers = scorers.filter((s) => s.category === options.category);
8184
+ }
8185
+ if (options?.type) {
8186
+ scorers = scorers.filter((s) => s.type === options.type);
8187
+ }
8188
+ return scorers;
8189
+ }
8190
+ /**
8191
+ * Get a list of available evaluation pipeline presets.
8192
+ * Presets are pre-configured pipelines for common evaluation scenarios.
8193
+ *
8194
+ * @returns Array of preset names
8195
+ *
8196
+ * @example
8197
+ * ```typescript
8198
+ * const neurolink = new NeuroLink();
8199
+ * const presets = await neurolink.getEvaluationPresets();
8200
+ * console.log('Available presets:', presets);
8201
+ * // Output: ['safety', 'rag', 'quality', 'comprehensive', 'minimal', ...]
8202
+ * ```
8203
+ */
8204
+ async getEvaluationPresets() {
8205
+ const { getPresetNames } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
8206
+ return getPresetNames();
8207
+ }
8208
+ /**
8209
+ * Get details of a specific evaluation preset.
8210
+ *
8211
+ * @param presetName - Name of the preset
8212
+ * @returns Pipeline configuration for the preset
8213
+ *
8214
+ * @example
8215
+ * ```typescript
8216
+ * const neurolink = new NeuroLink();
8217
+ * const ragPreset = await neurolink.getEvaluationPreset('rag');
8218
+ * console.log('RAG preset scorers:', ragPreset.scorers.map(s => s.id));
8219
+ * console.log('Pass threshold:', ragPreset.passThreshold);
8220
+ * ```
8221
+ */
8222
+ async getEvaluationPreset(presetName) {
8223
+ const { getPreset } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
8224
+ return getPreset(presetName);
8225
+ }
7930
8226
  /**
7931
8227
  * Dispose of all resources and cleanup connections
7932
8228
  * Call this method when done using the NeuroLink instance to prevent resource leaks
@@ -27,13 +27,15 @@ TextGenerationOptions, TextGenerationResult, UnifiedGenerationOptions, } from ".
27
27
  export * from "./hitlTypes.js";
28
28
  export * from "./middlewareTypes.js";
29
29
  export * from "./modelTypes.js";
30
+ export * from "./scorerTypes.js";
30
31
  export * from "./sdkTypes.js";
31
32
  export * from "./serviceTypes.js";
32
33
  export type { EnhancedStreamProvider, ProgressCallback, StreamingMetadata, StreamingOptions, StreamingProgressData, StreamOptions, StreamResult, ToolCall as StreamToolCall, // Renamed to avoid conflict with tools.js ToolCall
33
34
  ToolCallResults, ToolCalls, ToolResult as StreamToolResult, } from "./streamTypes.js";
34
35
  export * from "./ttsTypes.js";
35
36
  export * from "./utilities.js";
36
- export * from "./workflowTypes.js";
37
+ export type { AggregatedUsage, ConditioningConfig, ConditionOptions, ConditionResult, EnsembleExecutionResult, EnsembleResponse, ExecuteEnsembleOptions, ExecuteLayerOptions, ExecuteModelOptions, ExecutionConfig, ExecutionStrategy, JudgeConfig, JudgeOutputFormat, JudgeScores, LayerExecutionResult, ListOptions, ModelGroup, MultiJudgeScores, ParsedJudgeResponse, RegisterOptions, RegisterResult, RegistryEntry, RegistryStats, ScoreOptions, ScoreResult as WorkflowScoreResult, SummaryStats, ToneAdjustment, ValidationIssues, WorkflowAnalytics, WorkflowComparison, WorkflowConfig, WorkflowErrorDetails, WorkflowEvaluationData, WorkflowExecutionMetrics, WorkflowGenerateOptions, WorkflowInput, WorkflowMetadata, WorkflowModelConfig, WorkflowResult, WorkflowType, WorkflowValidationError, WorkflowValidationResult, WorkflowValidationWarning, } from "./workflowTypes.js";
38
+ export { WorkflowError } from "./workflowTypes.js";
37
39
  export * from "./contextTypes.js";
38
40
  export * from "./fileReferenceTypes.js";
39
41
  export * from "./ragTypes.js";
@@ -28,6 +28,8 @@ export * from "./hitlTypes.js";
28
28
  export * from "./middlewareTypes.js";
29
29
  // Model types - NEW
30
30
  export * from "./modelTypes.js";
31
+ // Scorer types for evaluation system
32
+ export * from "./scorerTypes.js";
31
33
  // SDK Types - Core types for external developers
32
34
  // Note: sdkTypes.ts uses selective re-exports internally, so we use wildcard here
33
35
  // The conflicts were from generateTypes and analytics which are now handled above
@@ -38,8 +40,7 @@ export * from "./serviceTypes.js";
38
40
  export * from "./ttsTypes.js";
39
41
  // Utilities Types - Utility module types (selective export to avoid conflicts)
40
42
  export * from "./utilities.js";
41
- // Workflow types
42
- export * from "./workflowTypes.js";
43
+ export { WorkflowError } from "./workflowTypes.js";
43
44
  // Context compaction types
44
45
  export * from "./contextTypes.js";
45
46
  // File reference types