@juspay/neurolink 9.36.1 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/types/index.d.ts +3 -1
  189. package/dist/lib/types/index.js +3 -2
  190. package/dist/lib/types/scorerTypes.d.ts +423 -0
  191. package/dist/lib/types/scorerTypes.js +6 -0
  192. package/dist/lib/utils/errorHandling.d.ts +20 -0
  193. package/dist/lib/utils/errorHandling.js +60 -0
  194. package/dist/neurolink.d.ts +204 -0
  195. package/dist/neurolink.js +296 -0
  196. package/dist/types/index.d.ts +3 -1
  197. package/dist/types/index.js +3 -2
  198. package/dist/types/scorerTypes.d.ts +423 -0
  199. package/dist/types/scorerTypes.js +5 -0
  200. package/dist/utils/errorHandling.d.ts +20 -0
  201. package/dist/utils/errorHandling.js +60 -0
  202. package/package.json +1 -1
@@ -0,0 +1,95 @@
1
+ /**
2
+ * @file Custom Scorer Utilities
3
+ * Helper functions for creating custom scorers
4
+ */
5
+ import type { JsonObject } from "../../types/common.js";
6
+ import type { ScorerCategory, ScorerConfig, ScorerInput, ScorerMetadata, ScorerType } from "../../types/scorerTypes.js";
7
+ import { BaseScorer } from "./baseScorer.js";
8
+ /**
9
+ * Function scorer - a simple function-based scorer
10
+ */
11
+ export type ScorerFunction = (input: ScorerInput) => Promise<{
12
+ score: number;
13
+ reasoning: string;
14
+ metadata?: JsonObject;
15
+ }>;
16
+ /**
17
+ * Create scorer metadata with defaults
18
+ */
19
+ export declare function createScorerMetadata(id: string, name: string, options?: {
20
+ description?: string;
21
+ type?: ScorerType;
22
+ category?: ScorerCategory;
23
+ version?: string;
24
+ requiredInputs?: (keyof ScorerInput)[];
25
+ optionalInputs?: (keyof ScorerInput)[];
26
+ defaultConfig?: ScorerConfig;
27
+ }): ScorerMetadata;
28
+ /**
29
+ * Create a simple function-based scorer
30
+ */
31
+ export declare function createFunctionScorer(id: string, name: string, scorerFn: ScorerFunction, options?: {
32
+ description?: string;
33
+ category?: ScorerCategory;
34
+ type?: ScorerType;
35
+ version?: string;
36
+ requiredInputs?: (keyof ScorerInput)[];
37
+ optionalInputs?: (keyof ScorerInput)[];
38
+ config?: ScorerConfig;
39
+ }): BaseScorer;
40
+ /**
41
+ * Create a regex-based scorer
42
+ */
43
+ export declare function createRegexScorer(id: string, name: string, options: {
44
+ pattern: string | RegExp;
45
+ flags?: string;
46
+ shouldMatch?: boolean;
47
+ description?: string;
48
+ config?: ScorerConfig;
49
+ }): BaseScorer;
50
+ /**
51
+ * Create a keyword presence scorer
52
+ */
53
+ export declare function createKeywordScorer(id: string, name: string, options: {
54
+ requiredKeywords?: string[];
55
+ forbiddenKeywords?: string[];
56
+ caseInsensitive?: boolean;
57
+ description?: string;
58
+ config?: ScorerConfig;
59
+ }): BaseScorer;
60
+ /**
61
+ * Create a length-based scorer
62
+ */
63
+ export declare function createSimpleLengthScorer(id: string, name: string, options: {
64
+ minWords?: number;
65
+ maxWords?: number;
66
+ minChars?: number;
67
+ maxChars?: number;
68
+ description?: string;
69
+ config?: ScorerConfig;
70
+ }): BaseScorer;
71
+ /**
72
+ * Compose multiple scorers into a single scorer with aggregation
73
+ */
74
+ export declare function composeScorers(id: string, name: string, scorers: BaseScorer[], options?: {
75
+ aggregation?: "average" | "min" | "max" | "weighted";
76
+ weights?: number[];
77
+ description?: string;
78
+ config?: ScorerConfig;
79
+ }): BaseScorer;
80
+ /**
81
+ * Create a conditional scorer that only runs if a condition is met
82
+ */
83
+ export declare function createConditionalScorer(id: string, name: string, condition: (input: ScorerInput) => boolean, scorer: BaseScorer, options?: {
84
+ defaultScore?: number;
85
+ defaultReasoning?: string;
86
+ description?: string;
87
+ config?: ScorerConfig;
88
+ }): BaseScorer;
89
+ /**
90
+ * Create a scorer that inverts the score (10 - score)
91
+ */
92
+ export declare function createInvertedScorer(id: string, name: string, scorer: BaseScorer, options?: {
93
+ description?: string;
94
+ config?: ScorerConfig;
95
+ }): BaseScorer;
@@ -0,0 +1,382 @@
1
+ /**
2
+ * @file Custom Scorer Utilities
3
+ * Helper functions for creating custom scorers
4
+ */
5
+ import { BaseScorer, DEFAULT_SCORE_SCALE } from "./baseScorer.js";
6
+ import { evaluationErrors } from "../errors/EvaluationError.js";
7
+ /**
8
+ * Create scorer metadata with defaults
9
+ */
10
+ export function createScorerMetadata(id, name, options) {
11
+ return {
12
+ id,
13
+ name,
14
+ description: options?.description ?? `Custom scorer: ${name}`,
15
+ type: options?.type ?? "rule",
16
+ category: options?.category ?? "custom",
17
+ version: options?.version ?? "1.0.0",
18
+ requiredInputs: options?.requiredInputs ?? ["response"],
19
+ optionalInputs: options?.optionalInputs ?? [
20
+ "query",
21
+ "context",
22
+ "groundTruth",
23
+ ],
24
+ defaultConfig: options?.defaultConfig ?? {
25
+ enabled: true,
26
+ threshold: 0.7,
27
+ weight: 1.0,
28
+ timeout: 5000,
29
+ retries: 0,
30
+ },
31
+ };
32
+ }
33
+ /**
34
+ * Function-based scorer implementation
35
+ */
36
+ class FunctionScorer extends BaseScorer {
37
+ _scorerFn;
38
+ constructor(metadata, scorerFn, config) {
39
+ super(metadata, config);
40
+ this._scorerFn = scorerFn;
41
+ }
42
+ async score(input) {
43
+ return this.executeWithTiming(async () => {
44
+ // Validate input
45
+ const validation = this.validateInput(input);
46
+ if (!validation.valid) {
47
+ return this.createErrorResult(`Invalid input: ${validation.errors.join(", ")}`);
48
+ }
49
+ try {
50
+ const result = await this._scorerFn(input);
51
+ // Clamp score to valid range
52
+ const clampedScore = Math.max(DEFAULT_SCORE_SCALE.min, Math.min(DEFAULT_SCORE_SCALE.max, result.score));
53
+ return this.createScoreResult(clampedScore, result.reasoning, {
54
+ metadata: result.metadata,
55
+ });
56
+ }
57
+ catch (error) {
58
+ const errorMessage = error instanceof Error ? error.message : String(error);
59
+ return this.createErrorResult(errorMessage);
60
+ }
61
+ });
62
+ }
63
+ }
64
+ /**
65
+ * Create a simple function-based scorer
66
+ */
67
+ export function createFunctionScorer(id, name, scorerFn, options) {
68
+ const metadata = createScorerMetadata(id, name, {
69
+ description: options?.description,
70
+ category: options?.category,
71
+ type: options?.type ?? "rule",
72
+ version: options?.version,
73
+ requiredInputs: options?.requiredInputs,
74
+ optionalInputs: options?.optionalInputs,
75
+ });
76
+ return new FunctionScorer(metadata, scorerFn, options?.config);
77
+ }
78
+ /**
79
+ * Create a regex-based scorer
80
+ */
81
+ export function createRegexScorer(id, name, options) {
82
+ const metadata = createScorerMetadata(id, name, {
83
+ description: options.description ??
84
+ `Regex scorer checking for pattern: ${options.pattern}`,
85
+ type: "rule",
86
+ category: "quality",
87
+ });
88
+ let pattern;
89
+ if (typeof options.pattern === "string") {
90
+ if (options.pattern.length > 200) {
91
+ throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern exceeds maximum length of 200 characters", {
92
+ retryable: false,
93
+ details: { patternLength: options.pattern.length },
94
+ });
95
+ }
96
+ // Check for nested quantifiers that could cause catastrophic backtracking
97
+ if (/(\+|\*|\{)\S*(\+|\*|\{)/.test(options.pattern)) {
98
+ throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern contains nested quantifiers which may cause catastrophic backtracking", { retryable: false, details: { pattern: options.pattern } });
99
+ }
100
+ try {
101
+ pattern = new RegExp(options.pattern, options.flags ?? "i");
102
+ }
103
+ catch (e) {
104
+ throw evaluationErrors.create("CONFIGURATION_ERROR", `Invalid regex pattern: ${e instanceof Error ? e.message : String(e)}`, { retryable: false, cause: e instanceof Error ? e : undefined });
105
+ }
106
+ }
107
+ else {
108
+ // Validate precompiled RegExp with the same safety rules
109
+ const regexSource = options.pattern.source;
110
+ if (regexSource.length > 200) {
111
+ throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern exceeds maximum length of 200 characters", {
112
+ retryable: false,
113
+ details: { patternLength: regexSource.length },
114
+ });
115
+ }
116
+ if (/(\+|\*|\{)\S*(\+|\*|\{)/.test(regexSource)) {
117
+ throw evaluationErrors.create("CONFIGURATION_ERROR", "Regex pattern contains nested quantifiers which may cause catastrophic backtracking", { retryable: false, details: { pattern: regexSource } });
118
+ }
119
+ pattern = options.pattern;
120
+ }
121
+ const shouldMatch = options.shouldMatch ?? true;
122
+ return new FunctionScorer(metadata, async (input) => {
123
+ if (pattern.global) {
124
+ pattern.lastIndex = 0;
125
+ }
126
+ const matches = pattern.test(input.response);
127
+ const passed = shouldMatch ? matches : !matches;
128
+ return {
129
+ score: passed ? 10 : 0,
130
+ reasoning: passed
131
+ ? `Response ${shouldMatch ? "matches" : "does not match"} expected pattern`
132
+ : `Response ${shouldMatch ? "does not match" : "matches"} expected pattern`,
133
+ metadata: {
134
+ pattern: pattern.source,
135
+ flags: pattern.flags,
136
+ matches,
137
+ shouldMatch,
138
+ },
139
+ };
140
+ }, options.config);
141
+ }
142
+ /**
143
+ * Create a keyword presence scorer
144
+ */
145
+ export function createKeywordScorer(id, name, options) {
146
+ const metadata = createScorerMetadata(id, name, {
147
+ description: options.description ?? `Keyword presence scorer`,
148
+ type: "rule",
149
+ category: "quality",
150
+ });
151
+ const requiredKeywords = options.requiredKeywords ?? [];
152
+ const forbiddenKeywords = options.forbiddenKeywords ?? [];
153
+ const caseInsensitive = options.caseInsensitive ?? true;
154
+ return new FunctionScorer(metadata, async (input) => {
155
+ const text = caseInsensitive
156
+ ? input.response.toLowerCase()
157
+ : input.response;
158
+ // Check required keywords
159
+ const foundRequired = [];
160
+ const missingRequired = [];
161
+ for (const keyword of requiredKeywords) {
162
+ const searchKeyword = caseInsensitive ? keyword.toLowerCase() : keyword;
163
+ if (text.includes(searchKeyword)) {
164
+ foundRequired.push(keyword);
165
+ }
166
+ else {
167
+ missingRequired.push(keyword);
168
+ }
169
+ }
170
+ // Check forbidden keywords
171
+ const foundForbidden = [];
172
+ for (const keyword of forbiddenKeywords) {
173
+ const searchKeyword = caseInsensitive ? keyword.toLowerCase() : keyword;
174
+ if (text.includes(searchKeyword)) {
175
+ foundForbidden.push(keyword);
176
+ }
177
+ }
178
+ // Calculate score
179
+ let score = 10;
180
+ const totalChecks = requiredKeywords.length + forbiddenKeywords.length;
181
+ if (totalChecks > 0) {
182
+ const passedChecks = foundRequired.length +
183
+ (forbiddenKeywords.length - foundForbidden.length);
184
+ score = (passedChecks / totalChecks) * 10;
185
+ }
186
+ // Generate reasoning
187
+ const reasons = [];
188
+ if (missingRequired.length > 0) {
189
+ reasons.push(`Missing required keywords: ${missingRequired.join(", ")}`);
190
+ }
191
+ if (foundForbidden.length > 0) {
192
+ reasons.push(`Found forbidden keywords: ${foundForbidden.join(", ")}`);
193
+ }
194
+ if (reasons.length === 0) {
195
+ reasons.push("All keyword requirements satisfied");
196
+ }
197
+ return {
198
+ score,
199
+ reasoning: reasons.join(". "),
200
+ metadata: {
201
+ foundRequired,
202
+ missingRequired,
203
+ foundForbidden,
204
+ totalRequired: requiredKeywords.length,
205
+ totalForbidden: forbiddenKeywords.length,
206
+ },
207
+ };
208
+ }, options.config);
209
+ }
210
+ /**
211
+ * Create a length-based scorer
212
+ */
213
+ export function createSimpleLengthScorer(id, name, options) {
214
+ const metadata = createScorerMetadata(id, name, {
215
+ description: options.description ?? `Length scorer`,
216
+ type: "rule",
217
+ category: "quality",
218
+ });
219
+ return new FunctionScorer(metadata, async (input) => {
220
+ const wordCount = input.response
221
+ .trim()
222
+ .split(/\s+/)
223
+ .filter((w) => w.length > 0).length;
224
+ const charCount = input.response.length;
225
+ const issues = [];
226
+ let passed = true;
227
+ if (options.minWords !== undefined && wordCount < options.minWords) {
228
+ issues.push(`Too few words: ${wordCount} < ${options.minWords}`);
229
+ passed = false;
230
+ }
231
+ if (options.maxWords !== undefined && wordCount > options.maxWords) {
232
+ issues.push(`Too many words: ${wordCount} > ${options.maxWords}`);
233
+ passed = false;
234
+ }
235
+ if (options.minChars !== undefined && charCount < options.minChars) {
236
+ issues.push(`Too few characters: ${charCount} < ${options.minChars}`);
237
+ passed = false;
238
+ }
239
+ if (options.maxChars !== undefined && charCount > options.maxChars) {
240
+ issues.push(`Too many characters: ${charCount} > ${options.maxChars}`);
241
+ passed = false;
242
+ }
243
+ return {
244
+ score: passed ? 10 : 0,
245
+ reasoning: passed
246
+ ? `Length within bounds (${wordCount} words, ${charCount} chars)`
247
+ : issues.join("; "),
248
+ metadata: {
249
+ wordCount,
250
+ charCount,
251
+ minWords: options.minWords ?? null,
252
+ maxWords: options.maxWords ?? null,
253
+ minChars: options.minChars ?? null,
254
+ maxChars: options.maxChars ?? null,
255
+ },
256
+ };
257
+ }, options.config);
258
+ }
259
+ /**
260
+ * Compose multiple scorers into a single scorer with aggregation
261
+ */
262
+ export function composeScorers(id, name, scorers, options) {
263
+ if (scorers.length === 0) {
264
+ throw new Error("composeScorers requires at least one scorer. An empty array would produce NaN/Infinity during aggregation.");
265
+ }
266
+ const metadata = createScorerMetadata(id, name, {
267
+ description: options?.description ??
268
+ `Composed scorer with ${scorers.length} sub-scorers`,
269
+ type: "hybrid",
270
+ category: "custom",
271
+ });
272
+ const aggregation = options?.aggregation ?? "average";
273
+ const weights = options?.weights ?? scorers.map(() => 1.0);
274
+ return new FunctionScorer(metadata, async (input) => {
275
+ // Run all scorers
276
+ const results = await Promise.all(scorers.map((scorer) => scorer.score(input)));
277
+ // Aggregate scores
278
+ let aggregatedScore;
279
+ switch (aggregation) {
280
+ case "min":
281
+ aggregatedScore = Math.min(...results.map((r) => r.score));
282
+ break;
283
+ case "max":
284
+ aggregatedScore = Math.max(...results.map((r) => r.score));
285
+ break;
286
+ case "weighted": {
287
+ let totalWeight = 0;
288
+ let weightedSum = 0;
289
+ for (let i = 0; i < results.length; i++) {
290
+ const weight = weights[i] ?? 1.0;
291
+ totalWeight += weight;
292
+ weightedSum += results[i].score * weight;
293
+ }
294
+ aggregatedScore = totalWeight > 0 ? weightedSum / totalWeight : 0;
295
+ break;
296
+ }
297
+ case "average":
298
+ default:
299
+ aggregatedScore =
300
+ results.reduce((sum, r) => sum + r.score, 0) / results.length;
301
+ break;
302
+ }
303
+ // Generate combined reasoning
304
+ const reasoning = results
305
+ .map((r, i) => `${scorers[i].metadata.name}: ${r.score.toFixed(1)}/10 - ${r.reasoning}`)
306
+ .join("; ");
307
+ return {
308
+ score: aggregatedScore,
309
+ reasoning: `Aggregated (${aggregation}): ${reasoning}`,
310
+ metadata: {
311
+ subScores: results.map((r, i) => ({
312
+ scorerId: scorers[i].metadata.id,
313
+ scorerName: scorers[i].metadata.name,
314
+ score: r.score,
315
+ passed: r.passed,
316
+ })),
317
+ aggregationMethod: aggregation,
318
+ },
319
+ };
320
+ }, options?.config);
321
+ }
322
+ /**
323
+ * Create a conditional scorer that only runs if a condition is met
324
+ */
325
+ export function createConditionalScorer(id, name, condition, scorer, options) {
326
+ const metadata = createScorerMetadata(id, name, {
327
+ description: options?.description ??
328
+ `Conditional scorer wrapping ${scorer.metadata.name}`,
329
+ type: scorer.metadata.type,
330
+ category: scorer.metadata.category,
331
+ });
332
+ const defaultScore = options?.defaultScore ?? 10;
333
+ const defaultReasoning = options?.defaultReasoning ?? "Condition not met, using default score";
334
+ return new FunctionScorer(metadata, async (input) => {
335
+ if (condition(input)) {
336
+ const result = await scorer.score(input);
337
+ return {
338
+ score: result.score,
339
+ reasoning: result.reasoning,
340
+ metadata: {
341
+ conditionMet: true,
342
+ wrappedScorer: scorer.metadata.id,
343
+ ...(result.metadata ?? {}),
344
+ },
345
+ };
346
+ }
347
+ return {
348
+ score: defaultScore,
349
+ reasoning: defaultReasoning,
350
+ metadata: {
351
+ conditionMet: false,
352
+ wrappedScorer: scorer.metadata.id,
353
+ },
354
+ };
355
+ }, options?.config);
356
+ }
357
+ /**
358
+ * Create a scorer that inverts the score (10 - score)
359
+ */
360
+ export function createInvertedScorer(id, name, scorer, options) {
361
+ const metadata = createScorerMetadata(id, name, {
362
+ description: options?.description ??
363
+ `Inverted scorer wrapping ${scorer.metadata.name}`,
364
+ type: scorer.metadata.type,
365
+ category: scorer.metadata.category,
366
+ });
367
+ return new FunctionScorer(metadata, async (input) => {
368
+ const result = await scorer.score(input);
369
+ const invertedScore = DEFAULT_SCORE_SCALE.max - result.score;
370
+ return {
371
+ score: invertedScore,
372
+ reasoning: `Inverted: ${result.reasoning}`,
373
+ metadata: {
374
+ originalScore: result.score,
375
+ invertedScore,
376
+ wrappedScorer: scorer.metadata.id,
377
+ ...(result.metadata ?? {}),
378
+ },
379
+ };
380
+ }, options?.config);
381
+ }
382
+ //# sourceMappingURL=customScorerUtils.js.map
@@ -0,0 +1,10 @@
1
+ /**
2
+ * @file Scorers Index
3
+ * Export all scorers and scorer utilities
4
+ */
5
+ export { BaseScorer, DEFAULT_SCORE_SCALE } from "./baseScorer.js";
6
+ export { composeScorers, createConditionalScorer, createFunctionScorer, createInvertedScorer, createKeywordScorer, createRegexScorer, createScorerMetadata, createSimpleLengthScorer, type ScorerFunction, } from "./customScorerUtils.js";
7
+ export * from "./llm/index.js";
8
+ export * from "./rule/index.js";
9
+ export { ScorerBuilder, Scorers } from "./scorerBuilder.js";
10
+ export { ScorerRegistry } from "./scorerRegistry.js";
@@ -0,0 +1,17 @@
1
+ /**
2
+ * @file Scorers Index
3
+ * Export all scorers and scorer utilities
4
+ */
5
+ // Base classes
6
+ export { BaseScorer, DEFAULT_SCORE_SCALE } from "./baseScorer.js";
7
+ // Custom Scorer Utilities
8
+ export { composeScorers, createConditionalScorer, createFunctionScorer, createInvertedScorer, createKeywordScorer, createRegexScorer, createScorerMetadata, createSimpleLengthScorer, } from "./customScorerUtils.js";
9
+ // LLM Scorers
10
+ export * from "./llm/index.js";
11
+ // Rule Scorers
12
+ export * from "./rule/index.js";
13
+ // Scorer Builder
14
+ export { ScorerBuilder, Scorers } from "./scorerBuilder.js";
15
+ // Registry
16
+ export { ScorerRegistry } from "./scorerRegistry.js";
17
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @file Answer relevancy scorer
3
+ * Evaluates how relevant the AI response is to the user query
4
+ */
5
+ import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
6
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
7
+ export declare class AnswerRelevancyScorer extends BaseLLMScorer {
8
+ constructor(config?: Partial<LLMScorerConfig>);
9
+ generatePrompt(input: ScorerInput): string;
10
+ parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
11
+ }
12
+ export declare function createAnswerRelevancyScorer(config?: Partial<LLMScorerConfig>): Promise<AnswerRelevancyScorer>;
@@ -0,0 +1,100 @@
1
+ /**
2
+ * @file Answer relevancy scorer
3
+ * Evaluates how relevant the AI response is to the user query
4
+ */
5
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
6
+ const ANSWER_RELEVANCY_PROMPT = `You are evaluating if an AI response directly addresses the user's question.
7
+
8
+ **Question:**
9
+ {{query}}
10
+
11
+ **Response:**
12
+ {{response}}
13
+
14
+ {{#if hasContext}}
15
+ **Available Context:**
16
+ {{context}}
17
+ {{/if}}
18
+
19
+ ## Evaluation Criteria
20
+
21
+ 1. Does the response address the main intent of the question?
22
+ 2. Is the response complete and sufficient?
23
+ 3. Does it avoid unnecessary tangents?
24
+ 4. Is the information directly relevant to what was asked?
25
+
26
+ ## Output Format (JSON)
27
+
28
+ {
29
+ "addressesIntent": true/false,
30
+ "isComplete": true/false,
31
+ "isOnTopic": true/false,
32
+ "hasTangents": true/false,
33
+ "score": 0.0-10.0,
34
+ "reasoning": "explanation of the score",
35
+ "confidence": 0.0-1.0
36
+ }`;
37
+ export class AnswerRelevancyScorer extends BaseLLMScorer {
38
+ constructor(config) {
39
+ super({
40
+ id: "answer-relevancy",
41
+ name: "Answer Relevancy",
42
+ description: "Evaluates how relevant the AI response is to the user query",
43
+ type: "llm",
44
+ category: "relevancy",
45
+ version: "1.0.0",
46
+ defaultConfig: {
47
+ enabled: true,
48
+ threshold: 0.7,
49
+ weight: 1.0,
50
+ timeout: 25000,
51
+ retries: 2,
52
+ },
53
+ requiredInputs: ["query", "response"],
54
+ optionalInputs: ["context"],
55
+ }, config);
56
+ }
57
+ generatePrompt(input) {
58
+ let prompt = ANSWER_RELEVANCY_PROMPT;
59
+ prompt = this.substituteTemplate(prompt, {
60
+ query: input.query,
61
+ response: input.response,
62
+ });
63
+ const hasContext = !!(input.context && input.context.length > 0);
64
+ prompt = this.processConditionals(prompt, { hasContext });
65
+ if (hasContext && input.context) {
66
+ prompt = prompt.replace("{{context}}", input.context.map((c, i) => `[${i + 1}] ${c}`).join("\n"));
67
+ }
68
+ return prompt;
69
+ }
70
+ parseResponse(response, _input) {
71
+ const json = this.extractJSON(response);
72
+ if (!json) {
73
+ const score = this.extractScoreFromText(response);
74
+ return {
75
+ score,
76
+ reasoning: "Could not parse structured response",
77
+ confidence: 0.3,
78
+ };
79
+ }
80
+ const score = Math.min(10, Math.max(0, typeof json.score === "number" ? json.score : 5));
81
+ const confidence = Math.min(1, Math.max(0, typeof json.confidence === "number" ? json.confidence : 0.8));
82
+ return {
83
+ score,
84
+ reasoning: typeof json.reasoning === "string"
85
+ ? json.reasoning
86
+ : "No reasoning provided",
87
+ confidence,
88
+ metadata: {
89
+ addressesIntent: json.addressesIntent ?? null,
90
+ isComplete: json.isComplete ?? null,
91
+ isOnTopic: json.isOnTopic ?? null,
92
+ hasTangents: json.hasTangents ?? null,
93
+ },
94
+ };
95
+ }
96
+ }
97
+ export async function createAnswerRelevancyScorer(config) {
98
+ return new AnswerRelevancyScorer(config);
99
+ }
100
+ //# sourceMappingURL=answerRelevancyScorer.js.map
@@ -0,0 +1,71 @@
1
+ /**
2
+ * @file Base class for all LLM-based scorers
3
+ * Provides common functionality for calling LLMs and parsing responses
4
+ */
5
+ import type { JsonObject } from "../../../types/common.js";
6
+ import type { AIProvider } from "../../../types/providers.js";
7
+ import type { LLMScorer, LLMScorerConfig, ScoreResult, ScorerInput, ScorerMetadata } from "../../../types/scorerTypes.js";
8
+ import { BaseScorer } from "../baseScorer.js";
9
+ /**
10
+ * Default LLM scorer configuration
11
+ */
12
+ export declare const DEFAULT_LLM_SCORER_CONFIG: LLMScorerConfig;
13
+ /**
14
+ * Abstract base class for LLM-based scorers
15
+ */
16
+ export declare abstract class BaseLLMScorer extends BaseScorer implements LLMScorer {
17
+ protected _llmConfig: LLMScorerConfig;
18
+ protected provider?: AIProvider;
19
+ private initializationPromise;
20
+ constructor(metadata: ScorerMetadata, config?: LLMScorerConfig);
21
+ /**
22
+ * Get LLM-specific configuration
23
+ */
24
+ get llmConfig(): LLMScorerConfig;
25
+ /**
26
+ * Generate the prompt for LLM scoring - must be implemented by subclasses
27
+ */
28
+ abstract generatePrompt(input: ScorerInput): string;
29
+ /**
30
+ * Parse LLM response into score result - must be implemented by subclasses
31
+ */
32
+ abstract parseResponse(response: string, input: ScorerInput): Partial<ScoreResult>;
33
+ /**
34
+ * Main scoring method
35
+ */
36
+ score(input: ScorerInput): Promise<ScoreResult>;
37
+ /**
38
+ * Initialize the AI provider
39
+ */
40
+ protected initializeProvider(): Promise<void>;
41
+ /**
42
+ * Internal method to actually initialize the provider
43
+ */
44
+ private _doInitializeProvider;
45
+ /**
46
+ * Call the LLM with the given prompt
47
+ */
48
+ protected callLLM(prompt: string): Promise<string>;
49
+ /**
50
+ * Extract JSON from LLM response
51
+ * Handles various formats including markdown code blocks
52
+ */
53
+ protected extractJSON(response: string): JsonObject | null;
54
+ /**
55
+ * Simple template substitution for prompts
56
+ */
57
+ protected substituteTemplate(template: string, variables: Record<string, string | string[] | undefined>): string;
58
+ /**
59
+ * Handle conditional template blocks
60
+ */
61
+ protected processConditionals(template: string, conditions: Record<string, boolean>): string;
62
+ /**
63
+ * Extract a numeric score from text response
64
+ * Safe numeric extraction without ReDoS-prone regex
65
+ */
66
+ protected extractNumericScore(text: string): number | null;
67
+ /**
68
+ * Extract a numeric score from text response with fallback
69
+ */
70
+ protected extractScoreFromText(text: string, min?: number, max?: number): number;
71
+ }