npm - @juspay/neurolink - Versions diffs - 9.36.0 → 9.37.0 - Mend

@juspay/neurolink 9.36.0 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (203) hide show

package/CHANGELOG.md +12 -0
package/dist/auth/errors.d.ts +1 -1
package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
package/dist/browser/neurolink.min.js +921 -423
package/dist/cli/commands/evaluate.d.ts +48 -0
package/dist/cli/commands/evaluate.js +955 -0
package/dist/cli/commands/proxy.js +6 -6
package/dist/cli/parser.js +4 -1
package/dist/evaluation/BatchEvaluator.d.ts +163 -0
package/dist/evaluation/BatchEvaluator.js +267 -0
package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
package/dist/evaluation/EvaluationAggregator.js +377 -0
package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
package/dist/evaluation/EvaluatorFactory.js +280 -0
package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
package/dist/evaluation/EvaluatorRegistry.js +184 -0
package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
package/dist/evaluation/errors/EvaluationError.js +206 -0
package/dist/evaluation/errors/index.d.ts +4 -0
package/dist/evaluation/errors/index.js +4 -0
package/dist/evaluation/hooks/index.d.ts +6 -0
package/dist/evaluation/hooks/index.js +6 -0
package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
package/dist/evaluation/hooks/observabilityHooks.js +181 -0
package/dist/evaluation/index.d.ts +11 -2
package/dist/evaluation/index.js +15 -0
package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
package/dist/evaluation/pipeline/index.d.ts +8 -0
package/dist/evaluation/pipeline/index.js +8 -0
package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
package/dist/evaluation/pipeline/presets.d.ts +66 -0
package/dist/evaluation/pipeline/presets.js +224 -0
package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
package/dist/evaluation/pipeline/strategies/index.js +6 -0
package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
package/dist/evaluation/reporting/index.d.ts +6 -0
package/dist/evaluation/reporting/index.js +6 -0
package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
package/dist/evaluation/reporting/metricsCollector.js +285 -0
package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
package/dist/evaluation/reporting/reportGenerator.js +374 -0
package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
package/dist/evaluation/scorers/baseScorer.js +232 -0
package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
package/dist/evaluation/scorers/customScorerUtils.js +381 -0
package/dist/evaluation/scorers/index.d.ts +10 -0
package/dist/evaluation/scorers/index.js +16 -0
package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
package/dist/evaluation/scorers/llm/index.d.ts +15 -0
package/dist/evaluation/scorers/llm/index.js +16 -0
package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
package/dist/evaluation/scorers/rule/index.d.ts +9 -0
package/dist/evaluation/scorers/rule/index.js +10 -0
package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
package/dist/evaluation/scorers/scorerBuilder.js +420 -0
package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
package/dist/evaluation/scorers/scorerRegistry.js +467 -0
package/dist/index.d.ts +37 -25
package/dist/index.js +65 -26
package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
package/dist/lib/evaluation/BatchEvaluator.js +268 -0
package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
package/dist/lib/evaluation/errors/index.d.ts +4 -0
package/dist/lib/evaluation/errors/index.js +5 -0
package/dist/lib/evaluation/hooks/index.d.ts +6 -0
package/dist/lib/evaluation/hooks/index.js +7 -0
package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
package/dist/lib/evaluation/index.d.ts +11 -2
package/dist/lib/evaluation/index.js +15 -0
package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
package/dist/lib/evaluation/pipeline/index.js +9 -0
package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
package/dist/lib/evaluation/pipeline/presets.js +225 -0
package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
package/dist/lib/evaluation/reporting/index.d.ts +6 -0
package/dist/lib/evaluation/reporting/index.js +7 -0
package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
package/dist/lib/evaluation/scorers/index.d.ts +10 -0
package/dist/lib/evaluation/scorers/index.js +17 -0
package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
package/dist/lib/evaluation/scorers/llm/index.js +17 -0
package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
package/dist/lib/evaluation/scorers/rule/index.js +11 -0
package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
package/dist/lib/index.d.ts +37 -25
package/dist/lib/index.js +65 -26
package/dist/lib/neurolink.d.ts +204 -0
package/dist/lib/neurolink.js +296 -0
package/dist/lib/types/index.d.ts +3 -1
package/dist/lib/types/index.js +3 -2
package/dist/lib/types/scorerTypes.d.ts +423 -0
package/dist/lib/types/scorerTypes.js +6 -0
package/dist/lib/utils/errorHandling.d.ts +20 -0
package/dist/lib/utils/errorHandling.js +60 -0
package/dist/neurolink.d.ts +204 -0
package/dist/neurolink.js +296 -0
package/dist/types/index.d.ts +3 -1
package/dist/types/index.js +3 -2
package/dist/types/scorerTypes.d.ts +423 -0
package/dist/types/scorerTypes.js +5 -0
package/dist/utils/errorHandling.d.ts +20 -0
package/dist/utils/errorHandling.js +60 -0
package/package.json +1 -1

package/dist/neurolink.d.ts CHANGED Viewed

@@ -1646,6 +1646,210 @@ export declare class NeuroLink {
      * Unregister all external MCP tools from the main registry
      */
     private unregisterAllExternalMCPToolsFromRegistry;
+    /**
+     * Create an evaluation pipeline with the specified configuration or preset.
+     * Pipelines orchestrate multiple scorers to evaluate AI responses comprehensively.
+     *
+     * @param configOrPreset - Pipeline configuration object or preset name
+     * @returns Initialized evaluation pipeline
+     *
+     * @example Using a preset
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const pipeline = await neurolink.createEvaluationPipeline('rag');
+     * const result = await pipeline.execute({
+     *   query: 'What is the capital of France?',
+     *   response: 'Paris is the capital of France.',
+     *   context: ['France is a country in Europe. Paris is its capital.']
+     * });
+     * console.log(result.overallScore, result.passed);
+     * ```
+     *
+     * @example Using custom configuration
+     * ```typescript
+     * const pipeline = await neurolink.createEvaluationPipeline({
+     *   name: 'custom-quality',
+     *   scorers: [
+     *     { id: 'toxicity', config: { threshold: 0.9 } },
+     *     { id: 'hallucination', config: { weight: 1.5 } },
+     *     { id: 'answer-relevancy' }
+     *   ],
+     *   aggregation: { method: 'weighted' },
+     *   passThreshold: 0.8
+     * });
+     * ```
+     */
+    createEvaluationPipeline(configOrPreset: import("./types/scorerTypes.js").PipelineConfig | "safety" | "rag" | "quality" | "comprehensive" | "minimal" | "summarization" | "customerSupport" | "codeGeneration"): Promise<import("./evaluation/pipeline/evaluationPipeline.js").EvaluationPipeline>;
+    /**
+     * Evaluate an AI response using the specified pipeline or scorers.
+     * This is a convenience method that creates a pipeline and executes it in one call.
+     *
+     * @param input - Scorer input containing query, response, and optional context
+     * @param options - Evaluation options including pipeline preset or custom scorers
+     * @returns Evaluation pipeline result with scores and pass/fail status
+     *
+     * @example Using a preset
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const result = await neurolink.evaluate(
+     *   {
+     *     query: 'Explain quantum computing',
+     *     response: 'Quantum computing uses qubits...'
+     *   },
+     *   { pipeline: 'quality' }
+     * );
+     * console.log(`Score: ${result.overallScore}, Passed: ${result.passed}`);
+     * ```
+     *
+     * @example Using specific scorers
+     * ```typescript
+     * const result = await neurolink.evaluate(
+     *   {
+     *     query: 'What causes rain?',
+     *     response: 'Rain is caused by water vapor...',
+     *     context: ['The water cycle involves evaporation...']
+     *   },
+     *   { scorers: ['hallucination', 'faithfulness', 'answer-relevancy'] }
+     * );
+     * ```
+     *
+     * @example Full RAG evaluation
+     * ```typescript
+     * const result = await neurolink.evaluate(
+     *   {
+     *     query: 'Who wrote Hamlet?',
+     *     response: 'Shakespeare wrote Hamlet in 1600.',
+     *     context: ['William Shakespeare wrote Hamlet around 1600-1601.'],
+     *     groundTruth: 'William Shakespeare'
+     *   },
+     *   { pipeline: 'rag' }
+     * );
+     * ```
+     */
+    evaluate(input: import("./types/scorerTypes.js").ScorerInput, options?: {
+        /** Pipeline preset to use */
+        pipeline?: "safety" | "rag" | "quality" | "comprehensive" | "minimal" | "summarization" | "customerSupport" | "codeGeneration";
+        /** Specific scorers to use (alternative to pipeline) */
+        scorers?: string[];
+        /** Pass threshold override (0-1) */
+        passThreshold?: number;
+        /** Execution mode */
+        executionMode?: "parallel" | "sequential";
+        /** Correlation ID for tracing */
+        correlationId?: string;
+        /** Overall evaluation timeout in milliseconds */
+        timeoutMs?: number;
+    }): Promise<import("./evaluation/pipeline/evaluationPipeline.js").PipelineResult>;
+    /**
+     * Score a response using a single scorer.
+     * Useful for quick, targeted evaluations without the overhead of a full pipeline.
+     *
+     * @param scorerId - The ID of the scorer to use (e.g., 'toxicity', 'hallucination')
+     * @param input - Scorer input containing query, response, and optional context
+     * @param config - Optional scorer configuration overrides
+     * @returns Score result with value, reasoning, and pass/fail status
+     *
+     * @example Basic scoring
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const result = await neurolink.score('toxicity', {
+     *   query: '',
+     *   response: 'This is a helpful response about cooking recipes.'
+     * });
+     * console.log(`Toxicity Score: ${result.score}/10, Passed: ${result.passed}`);
+     * ```
+     *
+     * @example Hallucination detection
+     * ```typescript
+     * const result = await neurolink.score('hallucination', {
+     *   query: 'What year was the Eiffel Tower built?',
+     *   response: 'The Eiffel Tower was built in 1889.',
+     *   context: ['The Eiffel Tower was constructed from 1887-1889.']
+     * });
+     * console.log(`Score: ${result.score}, Reasoning: ${result.reasoning}`);
+     * ```
+     *
+     * @example With custom threshold
+     * ```typescript
+     * const result = await neurolink.score(
+     *   'faithfulness',
+     *   {
+     *     query: 'Summarize the article',
+     *     response: 'The article discusses...',
+     *     context: ['Article content here...']
+     *   },
+     *   { threshold: 0.85, weight: 1.5 }
+     * );
+     * ```
+     */
+    score(scorerId: string, input: import("./types/scorerTypes.js").ScorerInput, config?: import("./types/scorerTypes.js").ScorerConfig): Promise<import("./types/scorerTypes.js").ScoreResult>;
+    /**
+     * Get a list of all available scorers and their metadata.
+     * Useful for discovering what evaluation capabilities are available.
+     *
+     * @param options - Filter options
+     * @returns Array of scorer metadata
+     *
+     * @example List all scorers
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const scorers = await neurolink.getAvailableScorers();
+     * for (const scorer of scorers) {
+     *   console.log(`${scorer.id}: ${scorer.description} (${scorer.type})`);
+     * }
+     * ```
+     *
+     * @example Filter by category
+     * ```typescript
+     * const safetyScorers = await neurolink.getAvailableScorers({
+     *   category: 'safety'
+     * });
+     * console.log('Safety scorers:', safetyScorers.map(s => s.id));
+     * ```
+     *
+     * @example Filter by type
+     * ```typescript
+     * const ruleBasedScorers = await neurolink.getAvailableScorers({
+     *   type: 'rule'
+     * });
+     * ```
+     */
+    getAvailableScorers(options?: {
+        /** Filter by category */
+        category?: import("./types/scorerTypes.js").ScorerCategory;
+        /** Filter by type */
+        type?: import("./types/scorerTypes.js").ScorerType;
+    }): Promise<import("./types/scorerTypes.js").ScorerMetadata[]>;
+    /**
+     * Get a list of available evaluation pipeline presets.
+     * Presets are pre-configured pipelines for common evaluation scenarios.
+     *
+     * @returns Array of preset names
+     *
+     * @example
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const presets = await neurolink.getEvaluationPresets();
+     * console.log('Available presets:', presets);
+     * // Output: ['safety', 'rag', 'quality', 'comprehensive', 'minimal', ...]
+     * ```
+     */
+    getEvaluationPresets(): Promise<string[]>;
+    /**
+     * Get details of a specific evaluation preset.
+     *
+     * @param presetName - Name of the preset
+     * @returns Pipeline configuration for the preset
+     *
+     * @example
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const ragPreset = await neurolink.getEvaluationPreset('rag');
+     * console.log('RAG preset scorers:', ragPreset.scorers.map(s => s.id));
+     * console.log('Pass threshold:', ragPreset.passThreshold);
+     * ```
+     */
+    getEvaluationPreset(presetName: "safety" | "rag" | "quality" | "comprehensive" | "minimal" | "summarization" | "customerSupport" | "codeGeneration"): Promise<import("./types/scorerTypes.js").PipelineConfig>;
     /**
      * Dispose of all resources and cleanup connections
      * Call this method when done using the NeuroLink instance to prevent resource leaks

package/dist/neurolink.js CHANGED Viewed

@@ -7927,6 +7927,302 @@ Current user's request: ${currentInput}`;
             mcpLogger.error("[NeuroLink] Failed to unregister all external MCP tools from registry:", error);
         }
     }
+    // ========================================
+    // Evaluation & Scoring API
+    // ========================================
+    /**
+     * Create an evaluation pipeline with the specified configuration or preset.
+     * Pipelines orchestrate multiple scorers to evaluate AI responses comprehensively.
+     *
+     * @param configOrPreset - Pipeline configuration object or preset name
+     * @returns Initialized evaluation pipeline
+     *
+     * @example Using a preset
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const pipeline = await neurolink.createEvaluationPipeline('rag');
+     * const result = await pipeline.execute({
+     *   query: 'What is the capital of France?',
+     *   response: 'Paris is the capital of France.',
+     *   context: ['France is a country in Europe. Paris is its capital.']
+     * });
+     * console.log(result.overallScore, result.passed);
+     * ```
+     *
+     * @example Using custom configuration
+     * ```typescript
+     * const pipeline = await neurolink.createEvaluationPipeline({
+     *   name: 'custom-quality',
+     *   scorers: [
+     *     { id: 'toxicity', config: { threshold: 0.9 } },
+     *     { id: 'hallucination', config: { weight: 1.5 } },
+     *     { id: 'answer-relevancy' }
+     *   ],
+     *   aggregation: { method: 'weighted' },
+     *   passThreshold: 0.8
+     * });
+     * ```
+     */
+    async createEvaluationPipeline(configOrPreset) {
+        const { EvaluationPipeline, getPreset } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
+        let config;
+        if (typeof configOrPreset === "string") {
+            // It's a preset name
+            config = getPreset(configOrPreset);
+        }
+        else {
+            // It's a custom configuration
+            config = configOrPreset;
+        }
+        const pipeline = new EvaluationPipeline(config);
+        // Note: withTimeout races the promise but does not abort in-flight LLM calls.
+        // Full AbortController propagation into pipeline/scorer internals is planned.
+        await withTimeout(pipeline.initialize(), 30000, ErrorFactory.evaluationTimeout("pipeline initialization", 30000));
+        logger.debug(`[NeuroLink] Created evaluation pipeline: ${config.name ?? "custom"}`);
+        return pipeline;
+    }
+    /**
+     * Evaluate an AI response using the specified pipeline or scorers.
+     * This is a convenience method that creates a pipeline and executes it in one call.
+     *
+     * @param input - Scorer input containing query, response, and optional context
+     * @param options - Evaluation options including pipeline preset or custom scorers
+     * @returns Evaluation pipeline result with scores and pass/fail status
+     *
+     * @example Using a preset
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const result = await neurolink.evaluate(
+     *   {
+     *     query: 'Explain quantum computing',
+     *     response: 'Quantum computing uses qubits...'
+     *   },
+     *   { pipeline: 'quality' }
+     * );
+     * console.log(`Score: ${result.overallScore}, Passed: ${result.passed}`);
+     * ```
+     *
+     * @example Using specific scorers
+     * ```typescript
+     * const result = await neurolink.evaluate(
+     *   {
+     *     query: 'What causes rain?',
+     *     response: 'Rain is caused by water vapor...',
+     *     context: ['The water cycle involves evaporation...']
+     *   },
+     *   { scorers: ['hallucination', 'faithfulness', 'answer-relevancy'] }
+     * );
+     * ```
+     *
+     * @example Full RAG evaluation
+     * ```typescript
+     * const result = await neurolink.evaluate(
+     *   {
+     *     query: 'Who wrote Hamlet?',
+     *     response: 'Shakespeare wrote Hamlet in 1600.',
+     *     context: ['William Shakespeare wrote Hamlet around 1600-1601.'],
+     *     groundTruth: 'William Shakespeare'
+     *   },
+     *   { pipeline: 'rag' }
+     * );
+     * ```
+     */
+    async evaluate(input, options) {
+        const { EvaluationPipeline, getPreset } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
+        let config;
+        // Fail fast on conflicting or empty evaluator selection
+        if (options?.pipeline && options?.scorers) {
+            throw new Error("Cannot specify both 'pipeline' and 'scorers' options. Use one or the other.");
+        }
+        if (options?.scorers && options.scorers.length === 0) {
+            throw new Error("The 'scorers' array must not be empty. Provide at least one scorer ID or omit the option to use the default 'quality' preset.");
+        }
+        if (options?.pipeline) {
+            // Use preset
+            config = { ...getPreset(options.pipeline) };
+        }
+        else if (options?.scorers && options.scorers.length > 0) {
+            // Use custom scorers
+            config = {
+                name: "SDK Evaluation",
+                description: "Evaluation from NeuroLink SDK",
+                scorers: options.scorers.map((id) => ({ id })),
+                executionMode: options.executionMode ?? "parallel",
+                passThreshold: options.passThreshold ?? 0.7,
+            };
+        }
+        else {
+            // Default to quality preset
+            config = getPreset("quality");
+        }
+        // Apply overrides
+        if (options?.passThreshold !== undefined) {
+            config.passThreshold = options.passThreshold;
+        }
+        if (options?.executionMode !== undefined) {
+            config.executionMode = options.executionMode;
+        }
+        const pipeline = new EvaluationPipeline(config);
+        await withTimeout(pipeline.initialize(), 30000, ErrorFactory.evaluationTimeout("pipeline initialization", 30000));
+        const executionTimeoutMs = options?.timeoutMs ?? 60000;
+        const result = await withTimeout(pipeline.execute(input, {
+            correlationId: options?.correlationId,
+        }), executionTimeoutMs, ErrorFactory.evaluationTimeout("pipeline execution", executionTimeoutMs));
+        logger.debug(`[NeuroLink] Evaluation completed`, {
+            pipeline: config.name,
+            overallScore: result.overallScore,
+            passed: result.passed,
+            scorerCount: result.scores.length,
+        });
+        return result;
+    }
+    /**
+     * Score a response using a single scorer.
+     * Useful for quick, targeted evaluations without the overhead of a full pipeline.
+     *
+     * @param scorerId - The ID of the scorer to use (e.g., 'toxicity', 'hallucination')
+     * @param input - Scorer input containing query, response, and optional context
+     * @param config - Optional scorer configuration overrides
+     * @returns Score result with value, reasoning, and pass/fail status
+     *
+     * @example Basic scoring
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const result = await neurolink.score('toxicity', {
+     *   query: '',
+     *   response: 'This is a helpful response about cooking recipes.'
+     * });
+     * console.log(`Toxicity Score: ${result.score}/10, Passed: ${result.passed}`);
+     * ```
+     *
+     * @example Hallucination detection
+     * ```typescript
+     * const result = await neurolink.score('hallucination', {
+     *   query: 'What year was the Eiffel Tower built?',
+     *   response: 'The Eiffel Tower was built in 1889.',
+     *   context: ['The Eiffel Tower was constructed from 1887-1889.']
+     * });
+     * console.log(`Score: ${result.score}, Reasoning: ${result.reasoning}`);
+     * ```
+     *
+     * @example With custom threshold
+     * ```typescript
+     * const result = await neurolink.score(
+     *   'faithfulness',
+     *   {
+     *     query: 'Summarize the article',
+     *     response: 'The article discusses...',
+     *     context: ['Article content here...']
+     *   },
+     *   { threshold: 0.85, weight: 1.5 }
+     * );
+     * ```
+     */
+    async score(scorerId, input, config) {
+        const { ScorerRegistry } = await withTimeout(import("./evaluation/scorers/index.js"), 10000, ErrorFactory.evaluationTimeout("scorer module load", 10000));
+        // Ensure built-in scorers are registered
+        await withTimeout(ScorerRegistry.registerBuiltInScorers(), 30000, ErrorFactory.evaluationTimeout("scorer bootstrap", 30000));
+        // Get the scorer
+        const scorer = await withTimeout(ScorerRegistry.getScorer(scorerId, config), 30000, ErrorFactory.evaluationTimeout(`scorer load: ${scorerId}`, 30000));
+        if (!scorer) {
+            throw ErrorFactory.scorerNotFound(scorerId);
+        }
+        // Validate input
+        const validation = scorer.validateInput(input);
+        if (!validation.valid) {
+            throw ErrorFactory.evaluationValidationFailed(scorerId, validation.errors);
+        }
+        // Execute scoring
+        const result = await withTimeout(scorer.score(input), 60000, ErrorFactory.evaluationTimeout("scorer execution", 60000));
+        logger.debug(`[NeuroLink] Scoring completed`, {
+            scorerId,
+            score: result.score,
+            passed: result.passed,
+            computeTime: result.computeTime,
+        });
+        return result;
+    }
+    /**
+     * Get a list of all available scorers and their metadata.
+     * Useful for discovering what evaluation capabilities are available.
+     *
+     * @param options - Filter options
+     * @returns Array of scorer metadata
+     *
+     * @example List all scorers
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const scorers = await neurolink.getAvailableScorers();
+     * for (const scorer of scorers) {
+     *   console.log(`${scorer.id}: ${scorer.description} (${scorer.type})`);
+     * }
+     * ```
+     *
+     * @example Filter by category
+     * ```typescript
+     * const safetyScorers = await neurolink.getAvailableScorers({
+     *   category: 'safety'
+     * });
+     * console.log('Safety scorers:', safetyScorers.map(s => s.id));
+     * ```
+     *
+     * @example Filter by type
+     * ```typescript
+     * const ruleBasedScorers = await neurolink.getAvailableScorers({
+     *   type: 'rule'
+     * });
+     * ```
+     */
+    async getAvailableScorers(options) {
+        const { ScorerRegistry } = await withTimeout(import("./evaluation/scorers/index.js"), 10000, ErrorFactory.evaluationTimeout("scorer module load", 10000));
+        // Ensure built-in scorers are registered
+        await withTimeout(ScorerRegistry.registerBuiltInScorers(), 30000, ErrorFactory.evaluationTimeout("scorer bootstrap", 30000));
+        let scorers = ScorerRegistry.list();
+        // Apply filters
+        if (options?.category) {
+            scorers = scorers.filter((s) => s.category === options.category);
+        }
+        if (options?.type) {
+            scorers = scorers.filter((s) => s.type === options.type);
+        }
+        return scorers;
+    }
+    /**
+     * Get a list of available evaluation pipeline presets.
+     * Presets are pre-configured pipelines for common evaluation scenarios.
+     *
+     * @returns Array of preset names
+     *
+     * @example
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const presets = await neurolink.getEvaluationPresets();
+     * console.log('Available presets:', presets);
+     * // Output: ['safety', 'rag', 'quality', 'comprehensive', 'minimal', ...]
+     * ```
+     */
+    async getEvaluationPresets() {
+        const { getPresetNames } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
+        return getPresetNames();
+    }
+    /**
+     * Get details of a specific evaluation preset.
+     *
+     * @param presetName - Name of the preset
+     * @returns Pipeline configuration for the preset
+     *
+     * @example
+     * ```typescript
+     * const neurolink = new NeuroLink();
+     * const ragPreset = await neurolink.getEvaluationPreset('rag');
+     * console.log('RAG preset scorers:', ragPreset.scorers.map(s => s.id));
+     * console.log('Pass threshold:', ragPreset.passThreshold);
+     * ```
+     */
+    async getEvaluationPreset(presetName) {
+        const { getPreset } = await withTimeout(import("./evaluation/pipeline/index.js"), 10000, ErrorFactory.evaluationTimeout("evaluation module load", 10000));
+        return getPreset(presetName);
+    }
     /**
      * Dispose of all resources and cleanup connections
      * Call this method when done using the NeuroLink instance to prevent resource leaks

package/dist/types/index.d.ts CHANGED Viewed

@@ -27,13 +27,15 @@ TextGenerationOptions, TextGenerationResult, UnifiedGenerationOptions, } from ".
 export * from "./hitlTypes.js";
 export * from "./middlewareTypes.js";
 export * from "./modelTypes.js";
+export * from "./scorerTypes.js";
 export * from "./sdkTypes.js";
 export * from "./serviceTypes.js";
 export type { EnhancedStreamProvider, ProgressCallback, StreamingMetadata, StreamingOptions, StreamingProgressData, StreamOptions, StreamResult, ToolCall as StreamToolCall, // Renamed to avoid conflict with tools.js ToolCall
 ToolCallResults, ToolCalls, ToolResult as StreamToolResult, } from "./streamTypes.js";
 export * from "./ttsTypes.js";
 export * from "./utilities.js";
-export * from "./workflowTypes.js";
+export type { AggregatedUsage, ConditioningConfig, ConditionOptions, ConditionResult, EnsembleExecutionResult, EnsembleResponse, ExecuteEnsembleOptions, ExecuteLayerOptions, ExecuteModelOptions, ExecutionConfig, ExecutionStrategy, JudgeConfig, JudgeOutputFormat, JudgeScores, LayerExecutionResult, ListOptions, ModelGroup, MultiJudgeScores, ParsedJudgeResponse, RegisterOptions, RegisterResult, RegistryEntry, RegistryStats, ScoreOptions, ScoreResult as WorkflowScoreResult, SummaryStats, ToneAdjustment, ValidationIssues, WorkflowAnalytics, WorkflowComparison, WorkflowConfig, WorkflowErrorDetails, WorkflowEvaluationData, WorkflowExecutionMetrics, WorkflowGenerateOptions, WorkflowInput, WorkflowMetadata, WorkflowModelConfig, WorkflowResult, WorkflowType, WorkflowValidationError, WorkflowValidationResult, WorkflowValidationWarning, } from "./workflowTypes.js";
+export { WorkflowError } from "./workflowTypes.js";
 export * from "./contextTypes.js";
 export * from "./fileReferenceTypes.js";
 export * from "./ragTypes.js";

package/dist/types/index.js CHANGED Viewed

@@ -28,6 +28,8 @@ export * from "./hitlTypes.js";
 export * from "./middlewareTypes.js";
 // Model types - NEW
 export * from "./modelTypes.js";
+// Scorer types for evaluation system
+export * from "./scorerTypes.js";
 // SDK Types - Core types for external developers
 // Note: sdkTypes.ts uses selective re-exports internally, so we use wildcard here
 // The conflicts were from generateTypes and analytics which are now handled above
@@ -38,8 +40,7 @@ export * from "./serviceTypes.js";
 export * from "./ttsTypes.js";
 // Utilities Types - Utility module types (selective export to avoid conflicts)
 export * from "./utilities.js";
-// Workflow types
-export * from "./workflowTypes.js";
+export { WorkflowError } from "./workflowTypes.js";
 // Context compaction types
 export * from "./contextTypes.js";
 // File reference types