@juspay/neurolink 9.36.0 → 9.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +921 -423
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/commands/proxy.js +6 -6
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +1 -1
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Scorer type definitions for NeuroLink evaluation system
|
|
3
|
+
* Mastra-style modular scorer interfaces and types
|
|
4
|
+
*/
|
|
5
|
+
import type { JsonObject } from "./common.js";
|
|
6
|
+
import type { EnhancedEvaluationContext } from "./evaluationTypes.js";
|
|
7
|
+
import type { GenerateResult } from "./generateTypes.js";
|
|
8
|
+
/**
|
|
9
|
+
* Scorer type classification
|
|
10
|
+
*/
|
|
11
|
+
export type ScorerType = "llm" | "rule" | "hybrid";
|
|
12
|
+
/**
|
|
13
|
+
* Scorer categories for organization
|
|
14
|
+
*/
|
|
15
|
+
export type ScorerCategory = "accuracy" | "relevancy" | "safety" | "quality" | "faithfulness" | "custom";
|
|
16
|
+
/**
|
|
17
|
+
* Score scale configuration
|
|
18
|
+
*/
|
|
19
|
+
export type ScoreScale = {
|
|
20
|
+
/** Minimum score value */
|
|
21
|
+
min: number;
|
|
22
|
+
/** Maximum score value */
|
|
23
|
+
max: number;
|
|
24
|
+
/** Decimal precision for scores */
|
|
25
|
+
precision: number;
|
|
26
|
+
};
|
|
27
|
+
/**
|
|
28
|
+
* Individual score result from a scorer
|
|
29
|
+
*/
|
|
30
|
+
export type ScoreResult = {
|
|
31
|
+
/** Unique identifier for the scorer */
|
|
32
|
+
scorerId: string;
|
|
33
|
+
/** Display name of the scorer */
|
|
34
|
+
scorerName: string;
|
|
35
|
+
/** Numeric score value */
|
|
36
|
+
score: number;
|
|
37
|
+
/** Normalized score (0-1 scale) */
|
|
38
|
+
normalizedScore: number;
|
|
39
|
+
/** Score scale used */
|
|
40
|
+
scale: ScoreScale;
|
|
41
|
+
/** Human-readable reasoning for the score */
|
|
42
|
+
reasoning: string;
|
|
43
|
+
/** Whether the score passes the threshold */
|
|
44
|
+
passed: boolean;
|
|
45
|
+
/** Threshold used for pass/fail determination */
|
|
46
|
+
threshold: number;
|
|
47
|
+
/** Confidence level (0-1) for LLM-based scores */
|
|
48
|
+
confidence?: number;
|
|
49
|
+
/** Additional metadata from the scorer */
|
|
50
|
+
metadata?: JsonObject;
|
|
51
|
+
/** Time taken to compute the score (ms) */
|
|
52
|
+
computeTime: number;
|
|
53
|
+
/** Error if scoring failed */
|
|
54
|
+
error?: string;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Aggregated scores from multiple scorers
|
|
58
|
+
*/
|
|
59
|
+
export type AggregatedScores = {
|
|
60
|
+
/** Individual score results */
|
|
61
|
+
scores: ScoreResult[];
|
|
62
|
+
/** Overall aggregated score */
|
|
63
|
+
overallScore: number;
|
|
64
|
+
/** Aggregation method used */
|
|
65
|
+
aggregationMethod: AggregationMethod;
|
|
66
|
+
/** Whether overall evaluation passed */
|
|
67
|
+
passed: boolean;
|
|
68
|
+
/** Total computation time (ms) */
|
|
69
|
+
totalComputeTime: number;
|
|
70
|
+
/** Timestamp of evaluation */
|
|
71
|
+
timestamp: number;
|
|
72
|
+
/** Session/request ID for correlation */
|
|
73
|
+
correlationId?: string;
|
|
74
|
+
};
|
|
75
|
+
/**
|
|
76
|
+
* Aggregation method for combining scores
|
|
77
|
+
*/
|
|
78
|
+
export type AggregationMethod = "average" | "weighted" | "minimum" | "maximum" | "custom";
|
|
79
|
+
/**
|
|
80
|
+
* Scorer configuration options
|
|
81
|
+
*/
|
|
82
|
+
export type ScorerConfig = {
|
|
83
|
+
/** Whether the scorer is enabled */
|
|
84
|
+
enabled?: boolean;
|
|
85
|
+
/** Pass/fail threshold (0-1 normalized) */
|
|
86
|
+
threshold?: number;
|
|
87
|
+
/** Weight for weighted aggregation */
|
|
88
|
+
weight?: number;
|
|
89
|
+
/** Custom scorer-specific configuration */
|
|
90
|
+
options?: JsonObject;
|
|
91
|
+
/** Timeout for scorer execution (ms) */
|
|
92
|
+
timeout?: number;
|
|
93
|
+
/** Number of retry attempts */
|
|
94
|
+
retries?: number;
|
|
95
|
+
};
|
|
96
|
+
/**
|
|
97
|
+
* Input context for scorer execution
|
|
98
|
+
*/
|
|
99
|
+
export type ScorerInput = {
|
|
100
|
+
/** The user's original query/prompt */
|
|
101
|
+
query: string;
|
|
102
|
+
/** The AI-generated response to evaluate */
|
|
103
|
+
response: string;
|
|
104
|
+
/** Retrieved context (for RAG evaluations) */
|
|
105
|
+
context?: string[];
|
|
106
|
+
/** Ground truth/expected answer (for accuracy checks) */
|
|
107
|
+
groundTruth?: string;
|
|
108
|
+
/** Full generation result with metadata */
|
|
109
|
+
generationResult?: GenerateResult;
|
|
110
|
+
/** Enhanced evaluation context */
|
|
111
|
+
evaluationContext?: EnhancedEvaluationContext;
|
|
112
|
+
/** Conversation history for multi-turn evaluation */
|
|
113
|
+
conversationHistory?: Array<{
|
|
114
|
+
role: string;
|
|
115
|
+
content: string;
|
|
116
|
+
}>;
|
|
117
|
+
/** Custom input data for specific scorers */
|
|
118
|
+
custom?: JsonObject;
|
|
119
|
+
};
|
|
120
|
+
/**
|
|
121
|
+
* Scorer metadata for registration
|
|
122
|
+
*/
|
|
123
|
+
export type ScorerMetadata = {
|
|
124
|
+
/** Unique scorer identifier */
|
|
125
|
+
id: string;
|
|
126
|
+
/** Human-readable name */
|
|
127
|
+
name: string;
|
|
128
|
+
/** Description of what the scorer evaluates */
|
|
129
|
+
description: string;
|
|
130
|
+
/** Scorer type (llm, rule, hybrid) */
|
|
131
|
+
type: ScorerType;
|
|
132
|
+
/** Category for grouping */
|
|
133
|
+
category: ScorerCategory;
|
|
134
|
+
/** Version string */
|
|
135
|
+
version: string;
|
|
136
|
+
/** Default configuration */
|
|
137
|
+
defaultConfig: ScorerConfig;
|
|
138
|
+
/** Required input fields */
|
|
139
|
+
requiredInputs: (keyof ScorerInput)[];
|
|
140
|
+
/** Optional input fields */
|
|
141
|
+
optionalInputs: (keyof ScorerInput)[];
|
|
142
|
+
};
|
|
143
|
+
/**
|
|
144
|
+
* LLM-based scorer configuration
|
|
145
|
+
*/
|
|
146
|
+
export type LLMScorerConfig = ScorerConfig & {
|
|
147
|
+
/** Model to use for scoring */
|
|
148
|
+
model?: string;
|
|
149
|
+
/** Provider for the scoring model */
|
|
150
|
+
provider?: string;
|
|
151
|
+
/** Temperature for LLM scoring */
|
|
152
|
+
temperature?: number;
|
|
153
|
+
/** Custom prompt template */
|
|
154
|
+
promptTemplate?: string;
|
|
155
|
+
/** Output schema for structured scoring */
|
|
156
|
+
outputSchema?: JsonObject;
|
|
157
|
+
};
|
|
158
|
+
/**
|
|
159
|
+
* Rule-based scorer configuration
|
|
160
|
+
*/
|
|
161
|
+
export type RuleScorerConfig = ScorerConfig & {
|
|
162
|
+
/** Rules to apply */
|
|
163
|
+
rules?: ScorerRule[];
|
|
164
|
+
/** How to combine rule results */
|
|
165
|
+
ruleCombination?: "all" | "any" | "weighted";
|
|
166
|
+
};
|
|
167
|
+
/**
|
|
168
|
+
* Individual rule for rule-based scorers
|
|
169
|
+
*/
|
|
170
|
+
export type ScorerRule = {
|
|
171
|
+
/** Rule identifier */
|
|
172
|
+
id: string;
|
|
173
|
+
/** Rule description */
|
|
174
|
+
description: string;
|
|
175
|
+
/** Rule type */
|
|
176
|
+
type: "regex" | "keyword" | "length" | "custom";
|
|
177
|
+
/** Rule parameters */
|
|
178
|
+
params: JsonObject;
|
|
179
|
+
/** Weight for this rule */
|
|
180
|
+
weight?: number;
|
|
181
|
+
};
|
|
182
|
+
/**
|
|
183
|
+
* Rule evaluation result
|
|
184
|
+
*/
|
|
185
|
+
export type RuleResult = {
|
|
186
|
+
/** Rule identifier */
|
|
187
|
+
ruleId: string;
|
|
188
|
+
/** Whether the rule passed */
|
|
189
|
+
passed: boolean;
|
|
190
|
+
/** Score from this rule */
|
|
191
|
+
score: number;
|
|
192
|
+
/** Reasoning for the result */
|
|
193
|
+
reasoning?: string;
|
|
194
|
+
};
|
|
195
|
+
/**
|
|
196
|
+
* Scorer execution events for observability
|
|
197
|
+
*/
|
|
198
|
+
export type ScorerEvent = {
|
|
199
|
+
/** Event type */
|
|
200
|
+
type: "scorer:start" | "scorer:end" | "scorer:error";
|
|
201
|
+
/** Scorer identifier */
|
|
202
|
+
scorerId: string;
|
|
203
|
+
/** Event timestamp */
|
|
204
|
+
timestamp: number;
|
|
205
|
+
/** Duration (for end events) */
|
|
206
|
+
duration?: number;
|
|
207
|
+
/** Score result (for end events) */
|
|
208
|
+
score?: number;
|
|
209
|
+
/** Error message (for error events) */
|
|
210
|
+
error?: string;
|
|
211
|
+
/** Additional metadata */
|
|
212
|
+
metadata?: JsonObject;
|
|
213
|
+
};
|
|
214
|
+
/**
|
|
215
|
+
* Scorer registry entry
|
|
216
|
+
*/
|
|
217
|
+
export type ScorerRegistryEntry = {
|
|
218
|
+
/** Scorer metadata */
|
|
219
|
+
metadata: ScorerMetadata;
|
|
220
|
+
/** Factory function for creating scorer instances */
|
|
221
|
+
factory: ScorerFactory;
|
|
222
|
+
/** Default configuration */
|
|
223
|
+
defaultConfig: ScorerConfig;
|
|
224
|
+
/** Aliases for this scorer */
|
|
225
|
+
aliases?: string[];
|
|
226
|
+
};
|
|
227
|
+
/**
|
|
228
|
+
* Factory function for creating scorer instances
|
|
229
|
+
*/
|
|
230
|
+
export type ScorerFactory = (config?: ScorerConfig) => Promise<Scorer>;
|
|
231
|
+
/**
|
|
232
|
+
* Core Scorer interface - all scorers must implement this
|
|
233
|
+
*/
|
|
234
|
+
export type Scorer = {
|
|
235
|
+
/** Scorer metadata */
|
|
236
|
+
readonly metadata: ScorerMetadata;
|
|
237
|
+
/** Current configuration */
|
|
238
|
+
readonly config: ScorerConfig;
|
|
239
|
+
/**
|
|
240
|
+
* Execute the scorer and return a score result
|
|
241
|
+
* @param input - Input context for scoring
|
|
242
|
+
* @returns Score result
|
|
243
|
+
*/
|
|
244
|
+
score(input: ScorerInput): Promise<ScoreResult>;
|
|
245
|
+
/**
|
|
246
|
+
* Validate that required inputs are present
|
|
247
|
+
* @param input - Input to validate
|
|
248
|
+
* @returns Validation result
|
|
249
|
+
*/
|
|
250
|
+
validateInput(input: ScorerInput): {
|
|
251
|
+
valid: boolean;
|
|
252
|
+
errors: string[];
|
|
253
|
+
};
|
|
254
|
+
/**
|
|
255
|
+
* Update scorer configuration
|
|
256
|
+
* @param config - New configuration
|
|
257
|
+
*/
|
|
258
|
+
configure(config: Partial<ScorerConfig>): void;
|
|
259
|
+
};
|
|
260
|
+
/**
|
|
261
|
+
* Extended interface for LLM-based scorers
|
|
262
|
+
*/
|
|
263
|
+
export type LLMScorer = Scorer & {
|
|
264
|
+
/** LLM-specific configuration */
|
|
265
|
+
readonly llmConfig: LLMScorerConfig;
|
|
266
|
+
/**
|
|
267
|
+
* Generate the prompt for LLM scoring
|
|
268
|
+
* @param input - Scorer input
|
|
269
|
+
* @returns Prompt string
|
|
270
|
+
*/
|
|
271
|
+
generatePrompt(input: ScorerInput): string;
|
|
272
|
+
/**
|
|
273
|
+
* Parse LLM response into score result
|
|
274
|
+
* @param response - Raw LLM response
|
|
275
|
+
* @param input - Original input
|
|
276
|
+
* @returns Parsed score result
|
|
277
|
+
*/
|
|
278
|
+
parseResponse(response: string, input: ScorerInput): Partial<ScoreResult>;
|
|
279
|
+
};
|
|
280
|
+
/**
|
|
281
|
+
* Extended interface for rule-based scorers
|
|
282
|
+
*/
|
|
283
|
+
export type RuleScorer = Scorer & {
|
|
284
|
+
/** Rule-specific configuration */
|
|
285
|
+
readonly ruleConfig: RuleScorerConfig;
|
|
286
|
+
/**
|
|
287
|
+
* Get all rules for this scorer
|
|
288
|
+
* @returns Array of rules
|
|
289
|
+
*/
|
|
290
|
+
getRules(): ScorerRule[];
|
|
291
|
+
/**
|
|
292
|
+
* Evaluate a single rule
|
|
293
|
+
* @param rule - Rule to evaluate
|
|
294
|
+
* @param input - Scorer input
|
|
295
|
+
* @returns Rule result
|
|
296
|
+
*/
|
|
297
|
+
evaluateRule(rule: ScorerRule, input: ScorerInput): {
|
|
298
|
+
passed: boolean;
|
|
299
|
+
score: number;
|
|
300
|
+
};
|
|
301
|
+
};
|
|
302
|
+
/**
|
|
303
|
+
* Pipeline configuration for multi-scorer evaluation
|
|
304
|
+
*/
|
|
305
|
+
export type PipelineConfig = {
|
|
306
|
+
/** Pipeline name */
|
|
307
|
+
name?: string;
|
|
308
|
+
/** Pipeline description */
|
|
309
|
+
description?: string;
|
|
310
|
+
/** Scorers to run in the pipeline */
|
|
311
|
+
scorers: Array<{
|
|
312
|
+
id: string;
|
|
313
|
+
config?: ScorerConfig;
|
|
314
|
+
}>;
|
|
315
|
+
/** Aggregation configuration */
|
|
316
|
+
aggregation?: AggregationConfig;
|
|
317
|
+
/** Overall pass threshold */
|
|
318
|
+
passThreshold?: number;
|
|
319
|
+
/** Execution mode */
|
|
320
|
+
executionMode?: "parallel" | "sequential";
|
|
321
|
+
/** Stop on first failure */
|
|
322
|
+
stopOnFailure?: boolean;
|
|
323
|
+
/** Timeout for entire pipeline (ms) */
|
|
324
|
+
timeout?: number;
|
|
325
|
+
/** Required scorers that must pass */
|
|
326
|
+
requiredScorers?: string[];
|
|
327
|
+
};
|
|
328
|
+
/**
|
|
329
|
+
* Aggregation configuration
|
|
330
|
+
*/
|
|
331
|
+
export type AggregationConfig = {
|
|
332
|
+
/** Aggregation method */
|
|
333
|
+
method: AggregationMethod;
|
|
334
|
+
/** Weights for weighted aggregation */
|
|
335
|
+
weights?: Record<string, number>;
|
|
336
|
+
/** Custom aggregation function */
|
|
337
|
+
customFn?: (scores: ScoreResult[]) => number;
|
|
338
|
+
};
|
|
339
|
+
/**
|
|
340
|
+
* Sampling configuration for cost-efficient evaluation
|
|
341
|
+
*/
|
|
342
|
+
export type SamplingConfig = {
|
|
343
|
+
/** Sampling rate (0-1) */
|
|
344
|
+
rate: number;
|
|
345
|
+
/** Always evaluate certain conditions */
|
|
346
|
+
alwaysEvaluate?: {
|
|
347
|
+
/** Always evaluate errors */
|
|
348
|
+
errors?: boolean;
|
|
349
|
+
/** Always evaluate for certain users */
|
|
350
|
+
users?: string[];
|
|
351
|
+
/** Always evaluate certain tags */
|
|
352
|
+
tags?: string[];
|
|
353
|
+
};
|
|
354
|
+
/** Adaptive sampling configuration */
|
|
355
|
+
adaptive?: {
|
|
356
|
+
/** Enable adaptive sampling */
|
|
357
|
+
enabled: boolean;
|
|
358
|
+
/** Adjust rate based on quality */
|
|
359
|
+
qualityThreshold: number;
|
|
360
|
+
/** Minimum sampling rate */
|
|
361
|
+
minRate: number;
|
|
362
|
+
/** Maximum sampling rate */
|
|
363
|
+
maxRate: number;
|
|
364
|
+
};
|
|
365
|
+
};
|
|
366
|
+
/**
|
|
367
|
+
* Sampling decision result
|
|
368
|
+
*/
|
|
369
|
+
export type SamplingDecision = {
|
|
370
|
+
/** Whether to sample this request */
|
|
371
|
+
shouldSample: boolean;
|
|
372
|
+
/** Reason for decision */
|
|
373
|
+
reason: string;
|
|
374
|
+
/** Current sampling rate */
|
|
375
|
+
currentRate: number;
|
|
376
|
+
};
|
|
377
|
+
/**
|
|
378
|
+
* Sampling context for adaptive sampling
|
|
379
|
+
*/
|
|
380
|
+
export type SamplingContext = {
|
|
381
|
+
/** Recent quality scores */
|
|
382
|
+
recentScores?: number[];
|
|
383
|
+
/** User ID if available */
|
|
384
|
+
userId?: string;
|
|
385
|
+
/** Tags for this request */
|
|
386
|
+
tags?: string[];
|
|
387
|
+
/** Whether this request errored */
|
|
388
|
+
hasError?: boolean;
|
|
389
|
+
};
|
|
390
|
+
/**
|
|
391
|
+
* Evaluation trace context for observability
|
|
392
|
+
*/
|
|
393
|
+
export type EvaluationTraceContext = {
|
|
394
|
+
/** Trace ID */
|
|
395
|
+
traceId: string;
|
|
396
|
+
/** Span ID */
|
|
397
|
+
spanId?: string;
|
|
398
|
+
/** Parent span ID */
|
|
399
|
+
parentSpanId?: string;
|
|
400
|
+
/** Session ID */
|
|
401
|
+
sessionId?: string;
|
|
402
|
+
/** User ID */
|
|
403
|
+
userId?: string;
|
|
404
|
+
/** Custom attributes */
|
|
405
|
+
attributes?: Record<string, string | number | boolean>;
|
|
406
|
+
};
|
|
407
|
+
/**
|
|
408
|
+
* Report format options
|
|
409
|
+
*/
|
|
410
|
+
export type ReportFormat = "text" | "json" | "markdown" | "html";
|
|
411
|
+
/**
|
|
412
|
+
* Report configuration
|
|
413
|
+
*/
|
|
414
|
+
export type ReportConfig = {
|
|
415
|
+
/** Report format */
|
|
416
|
+
format: ReportFormat;
|
|
417
|
+
/** Include detailed reasoning */
|
|
418
|
+
includeReasoning?: boolean;
|
|
419
|
+
/** Include metadata */
|
|
420
|
+
includeMetadata?: boolean;
|
|
421
|
+
/** Include timing information */
|
|
422
|
+
includeTiming?: boolean;
|
|
423
|
+
};
|
|
@@ -36,6 +36,10 @@ export declare const ERROR_CODES: {
|
|
|
36
36
|
readonly RATE_LIMITER_QUEUE_FULL: "RATE_LIMITER_QUEUE_FULL";
|
|
37
37
|
readonly RATE_LIMITER_QUEUE_TIMEOUT: "RATE_LIMITER_QUEUE_TIMEOUT";
|
|
38
38
|
readonly RATE_LIMITER_RESET: "RATE_LIMITER_RESET";
|
|
39
|
+
readonly SCORER_NOT_FOUND: "SCORER_NOT_FOUND";
|
|
40
|
+
readonly EVALUATION_VALIDATION_FAILED: "EVALUATION_VALIDATION_FAILED";
|
|
41
|
+
readonly EVALUATION_TIMEOUT: "EVALUATION_TIMEOUT";
|
|
42
|
+
readonly EVALUATION_EXECUTION_FAILED: "EVALUATION_EXECUTION_FAILED";
|
|
39
43
|
readonly MISSING_PPT_PROPERTIES: "MISSING_PPT_PROPERTIES";
|
|
40
44
|
readonly INVALID_PPT_PAGES: "INVALID_PPT_PAGES";
|
|
41
45
|
readonly INVALID_PPT_FORMAT: "INVALID_PPT_FORMAT";
|
|
@@ -214,6 +218,22 @@ export declare class ErrorFactory {
|
|
|
214
218
|
* Create an invalid PPT provider error
|
|
215
219
|
*/
|
|
216
220
|
static invalidPPTProvider(provider: unknown): NeuroLinkError;
|
|
221
|
+
/**
|
|
222
|
+
* Create a scorer not found error
|
|
223
|
+
*/
|
|
224
|
+
static scorerNotFound(scorerId: string, availableScorers?: string[]): NeuroLinkError;
|
|
225
|
+
/**
|
|
226
|
+
* Create an evaluation validation error
|
|
227
|
+
*/
|
|
228
|
+
static evaluationValidationFailed(scorerId: string, errors: string[]): NeuroLinkError;
|
|
229
|
+
/**
|
|
230
|
+
* Create an evaluation timeout error
|
|
231
|
+
*/
|
|
232
|
+
static evaluationTimeout(operation: string, timeoutMs: number): NeuroLinkError;
|
|
233
|
+
/**
|
|
234
|
+
* Create an evaluation execution failed error
|
|
235
|
+
*/
|
|
236
|
+
static evaluationExecutionFailed(operation: string, originalError: Error): NeuroLinkError;
|
|
217
237
|
}
|
|
218
238
|
/**
|
|
219
239
|
* Timeout wrapper for async operations
|
|
@@ -47,6 +47,11 @@ export const ERROR_CODES = {
|
|
|
47
47
|
RATE_LIMITER_QUEUE_FULL: "RATE_LIMITER_QUEUE_FULL",
|
|
48
48
|
RATE_LIMITER_QUEUE_TIMEOUT: "RATE_LIMITER_QUEUE_TIMEOUT",
|
|
49
49
|
RATE_LIMITER_RESET: "RATE_LIMITER_RESET",
|
|
50
|
+
// Evaluation errors
|
|
51
|
+
SCORER_NOT_FOUND: "SCORER_NOT_FOUND",
|
|
52
|
+
EVALUATION_VALIDATION_FAILED: "EVALUATION_VALIDATION_FAILED",
|
|
53
|
+
EVALUATION_TIMEOUT: "EVALUATION_TIMEOUT",
|
|
54
|
+
EVALUATION_EXECUTION_FAILED: "EVALUATION_EXECUTION_FAILED",
|
|
50
55
|
// PPT validation errors
|
|
51
56
|
MISSING_PPT_PROPERTIES: "MISSING_PPT_PROPERTIES",
|
|
52
57
|
INVALID_PPT_PAGES: "INVALID_PPT_PAGES",
|
|
@@ -731,6 +736,61 @@ export class ErrorFactory {
|
|
|
731
736
|
},
|
|
732
737
|
});
|
|
733
738
|
}
|
|
739
|
+
// ============================================================================
|
|
740
|
+
// EVALUATION ERRORS
|
|
741
|
+
// ============================================================================
|
|
742
|
+
/**
|
|
743
|
+
* Create a scorer not found error
|
|
744
|
+
*/
|
|
745
|
+
static scorerNotFound(scorerId, availableScorers) {
|
|
746
|
+
return new NeuroLinkError({
|
|
747
|
+
code: ERROR_CODES.SCORER_NOT_FOUND,
|
|
748
|
+
message: `Scorer '${scorerId}' not found. Use neurolink.getAvailableScorers() to see available scorers.`,
|
|
749
|
+
category: ErrorCategory.VALIDATION,
|
|
750
|
+
severity: ErrorSeverity.MEDIUM,
|
|
751
|
+
retriable: false,
|
|
752
|
+
context: { scorerId, availableScorers },
|
|
753
|
+
});
|
|
754
|
+
}
|
|
755
|
+
/**
|
|
756
|
+
* Create an evaluation validation error
|
|
757
|
+
*/
|
|
758
|
+
static evaluationValidationFailed(scorerId, errors) {
|
|
759
|
+
return new NeuroLinkError({
|
|
760
|
+
code: ERROR_CODES.EVALUATION_VALIDATION_FAILED,
|
|
761
|
+
message: `Invalid input for scorer '${scorerId}': ${errors.join(", ")}`,
|
|
762
|
+
category: ErrorCategory.VALIDATION,
|
|
763
|
+
severity: ErrorSeverity.MEDIUM,
|
|
764
|
+
retriable: false,
|
|
765
|
+
context: { scorerId, validationErrors: errors },
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
/**
|
|
769
|
+
* Create an evaluation timeout error
|
|
770
|
+
*/
|
|
771
|
+
static evaluationTimeout(operation, timeoutMs) {
|
|
772
|
+
return new NeuroLinkError({
|
|
773
|
+
code: ERROR_CODES.EVALUATION_TIMEOUT,
|
|
774
|
+
message: `Evaluation ${operation} timed out after ${timeoutMs}ms`,
|
|
775
|
+
category: ErrorCategory.TIMEOUT,
|
|
776
|
+
severity: ErrorSeverity.HIGH,
|
|
777
|
+
retriable: true,
|
|
778
|
+
context: { operation, timeoutMs },
|
|
779
|
+
});
|
|
780
|
+
}
|
|
781
|
+
/**
|
|
782
|
+
* Create an evaluation execution failed error
|
|
783
|
+
*/
|
|
784
|
+
static evaluationExecutionFailed(operation, originalError) {
|
|
785
|
+
return new NeuroLinkError({
|
|
786
|
+
code: ERROR_CODES.EVALUATION_EXECUTION_FAILED,
|
|
787
|
+
message: `Evaluation ${operation} failed: ${originalError.message}`,
|
|
788
|
+
category: ErrorCategory.EXECUTION,
|
|
789
|
+
severity: ErrorSeverity.HIGH,
|
|
790
|
+
retriable: false,
|
|
791
|
+
originalError,
|
|
792
|
+
});
|
|
793
|
+
}
|
|
734
794
|
}
|
|
735
795
|
/**
|
|
736
796
|
* Timeout wrapper for async operations
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@juspay/neurolink",
|
|
3
|
-
"version": "9.
|
|
3
|
+
"version": "9.37.0",
|
|
4
4
|
"description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 13 providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Juspay Technologies",
|