@juspay/neurolink 9.36.1 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/types/index.d.ts +3 -1
  189. package/dist/lib/types/index.js +3 -2
  190. package/dist/lib/types/scorerTypes.d.ts +423 -0
  191. package/dist/lib/types/scorerTypes.js +6 -0
  192. package/dist/lib/utils/errorHandling.d.ts +20 -0
  193. package/dist/lib/utils/errorHandling.js +60 -0
  194. package/dist/neurolink.d.ts +204 -0
  195. package/dist/neurolink.js +296 -0
  196. package/dist/types/index.d.ts +3 -1
  197. package/dist/types/index.js +3 -2
  198. package/dist/types/scorerTypes.d.ts +423 -0
  199. package/dist/types/scorerTypes.js +5 -0
  200. package/dist/utils/errorHandling.d.ts +20 -0
  201. package/dist/utils/errorHandling.js +60 -0
  202. package/package.json +1 -1
@@ -0,0 +1,122 @@
1
+ /**
2
+ * @file Faithfulness scorer
3
+ * Evaluates if the response is grounded in the provided context
4
+ */
5
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
6
+ const FAITHFULNESS_PROMPT = `You are an expert at evaluating faithfulness in AI responses.
7
+
8
+ Faithfulness measures whether the response is grounded in and supported by the provided context.
9
+ A faithful response:
10
+ - Only makes claims that are supported by the context
11
+ - Does not add information not present in the context
12
+ - Accurately represents the information from the context
13
+
14
+ ## Response to Evaluate
15
+ {{response}}
16
+
17
+ ## Source Context
18
+ {{context}}
19
+
20
+ {{#if hasQuery}}
21
+ ## Original Query
22
+ {{query}}
23
+ {{/if}}
24
+
25
+ ## Instructions
26
+
27
+ 1. Extract all claims/statements from the response
28
+ 2. For each claim, determine if it's supported by the context
29
+ 3. Calculate the faithfulness score based on the proportion of supported claims
30
+
31
+ ## Output Format (JSON)
32
+
33
+ {
34
+ "score": <0-10>,
35
+ "claims": [
36
+ {
37
+ "claim": "<extracted claim>",
38
+ "supported": <true|false>,
39
+ "evidence": "<supporting context or 'Not found in context'>"
40
+ }
41
+ ],
42
+ "supportedCount": <number>,
43
+ "totalClaims": <number>,
44
+ "reasoning": "<overall assessment>",
45
+ "confidence": <0.0-1.0>
46
+ }`;
47
+ export class FaithfulnessScorer extends BaseLLMScorer {
48
+ constructor(config) {
49
+ super({
50
+ id: "faithfulness",
51
+ name: "Faithfulness",
52
+ description: "Evaluates if the response is faithfully grounded in provided context",
53
+ type: "llm",
54
+ category: "faithfulness",
55
+ version: "1.0.0",
56
+ defaultConfig: {
57
+ enabled: true,
58
+ threshold: 0.7,
59
+ weight: 1.2,
60
+ timeout: 30000,
61
+ retries: 2,
62
+ },
63
+ requiredInputs: ["response", "context"],
64
+ optionalInputs: ["query"],
65
+ }, config);
66
+ }
67
+ generatePrompt(input) {
68
+ let prompt = FAITHFULNESS_PROMPT;
69
+ prompt = this.substituteTemplate(prompt, { response: input.response });
70
+ if (input.context && input.context.length > 0) {
71
+ const contextSection = input.context
72
+ .map((c, i) => `[Source ${i + 1}]: ${c}`)
73
+ .join("\n");
74
+ prompt = prompt.replace("{{context}}", contextSection);
75
+ }
76
+ else {
77
+ prompt = prompt.replace("{{context}}", "No context provided");
78
+ }
79
+ const hasQuery = !!input.query;
80
+ prompt = this.processConditionals(prompt, { hasQuery });
81
+ if (hasQuery) {
82
+ prompt = this.substituteTemplate(prompt, { query: input.query });
83
+ }
84
+ return prompt;
85
+ }
86
+ parseResponse(response, _input) {
87
+ const json = this.extractJSON(response);
88
+ if (!json) {
89
+ const score = this.extractScoreFromText(response);
90
+ return {
91
+ score,
92
+ reasoning: "Could not parse structured response",
93
+ confidence: 0.3,
94
+ };
95
+ }
96
+ const claims = Array.isArray(json.claims)
97
+ ? json.claims
98
+ : [];
99
+ const totalClaims = typeof json.totalClaims === "number" ? json.totalClaims : claims.length;
100
+ const supportedCount = typeof json.supportedCount === "number"
101
+ ? json.supportedCount
102
+ : claims.filter((c) => c.supported === true).length;
103
+ const faithfulnessRatio = totalClaims > 0 ? supportedCount / totalClaims : 1;
104
+ return {
105
+ score: typeof json.score === "number" ? json.score : faithfulnessRatio * 10,
106
+ reasoning: typeof json.reasoning === "string"
107
+ ? json.reasoning
108
+ : "No reasoning provided",
109
+ confidence: typeof json.confidence === "number" ? json.confidence : 0.8,
110
+ metadata: {
111
+ claims,
112
+ supportedCount,
113
+ totalClaims,
114
+ faithfulnessRatio,
115
+ },
116
+ };
117
+ }
118
+ }
119
+ export async function createFaithfulnessScorer(config) {
120
+ return new FaithfulnessScorer(config);
121
+ }
122
+ //# sourceMappingURL=faithfulnessScorer.js.map
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @file Hallucination detection scorer using LLM-as-judge
3
+ * Detects factual errors and unsupported claims in AI responses
4
+ */
5
+ import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
6
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
7
+ export declare class HallucinationScorer extends BaseLLMScorer {
8
+ constructor(config?: Partial<LLMScorerConfig>);
9
+ generatePrompt(input: ScorerInput): string;
10
+ parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
11
+ }
12
+ export declare function createHallucinationScorer(config?: Partial<LLMScorerConfig>): Promise<HallucinationScorer>;
@@ -0,0 +1,141 @@
1
+ /**
2
+ * @file Hallucination detection scorer using LLM-as-judge
3
+ * Detects factual errors and unsupported claims in AI responses
4
+ */
5
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
6
+ const HALLUCINATION_PROMPT = `You are an expert fact-checker evaluating an AI response for hallucinations.
7
+
8
+ A hallucination is when the AI:
9
+ 1. States false facts that contradict known information
10
+ 2. Fabricates specific details (names, dates, statistics) without basis
11
+ 3. Makes claims that cannot be verified or are contradicted by provided context
12
+ 4. Presents opinions or speculation as established facts
13
+
14
+ ## Evaluation Context
15
+
16
+ **User Query:**
17
+ {{query}}
18
+
19
+ **AI Response:**
20
+ {{response}}
21
+
22
+ {{#if hasContext}}
23
+ **Provided Context:**
24
+ {{context}}
25
+ {{/if}}
26
+
27
+ {{#if hasGroundTruth}}
28
+ **Ground Truth:**
29
+ {{groundTruth}}
30
+ {{/if}}
31
+
32
+ ## Instructions
33
+
34
+ Analyze the response for hallucinations. For each potential hallucination found:
35
+ 1. Quote the problematic text
36
+ 2. Explain why it's a hallucination
37
+ 3. Rate severity (minor, moderate, severe)
38
+
39
+ Then provide an overall score from 0-10:
40
+ - 10: No hallucinations detected
41
+ - 7-9: Minor issues (imprecise but not false)
42
+ - 4-6: Moderate hallucinations present
43
+ - 1-3: Severe hallucinations
44
+ - 0: Response is mostly fabricated
45
+
46
+ ## Output Format (JSON)
47
+
48
+ {
49
+ "score": <0-10>,
50
+ "hallucinations": [
51
+ {
52
+ "text": "<quoted problematic text>",
53
+ "reason": "<explanation>",
54
+ "severity": "<minor|moderate|severe>"
55
+ }
56
+ ],
57
+ "reasoning": "<overall assessment>",
58
+ "confidence": <0.0-1.0>
59
+ }`;
60
+ export class HallucinationScorer extends BaseLLMScorer {
61
+ constructor(config) {
62
+ super({
63
+ id: "hallucination",
64
+ name: "Hallucination Detection",
65
+ description: "Detects factual errors, fabrications, and unsupported claims in responses",
66
+ type: "llm",
67
+ category: "accuracy",
68
+ version: "1.0.0",
69
+ defaultConfig: {
70
+ enabled: true,
71
+ threshold: 0.8,
72
+ weight: 1.5,
73
+ timeout: 30000,
74
+ retries: 2,
75
+ },
76
+ requiredInputs: ["query", "response"],
77
+ optionalInputs: ["context", "groundTruth"],
78
+ }, config);
79
+ }
80
+ generatePrompt(input) {
81
+ let prompt = HALLUCINATION_PROMPT;
82
+ // Substitute variables
83
+ prompt = this.substituteTemplate(prompt, {
84
+ query: input.query,
85
+ response: input.response,
86
+ });
87
+ // Handle context
88
+ const contextExists = !!(input.context && input.context.length > 0);
89
+ prompt = this.processConditionals(prompt, { hasContext: contextExists });
90
+ if (contextExists && input.context) {
91
+ prompt = prompt.replace("{{context}}", input.context.map((c, i) => `[${i + 1}] ${c}`).join("\n"));
92
+ }
93
+ // Handle ground truth
94
+ const groundTruthExists = !!input.groundTruth;
95
+ prompt = this.processConditionals(prompt, {
96
+ hasGroundTruth: groundTruthExists,
97
+ });
98
+ if (groundTruthExists && input.groundTruth) {
99
+ prompt = prompt.replace("{{groundTruth}}", input.groundTruth);
100
+ }
101
+ return prompt;
102
+ }
103
+ parseResponse(response, _input) {
104
+ const json = this.extractJSON(response);
105
+ if (!json) {
106
+ // Try to extract score from text
107
+ const score = this.extractScoreFromText(response);
108
+ return {
109
+ score,
110
+ reasoning: "Could not parse structured response",
111
+ confidence: 0.3,
112
+ };
113
+ }
114
+ const hallucinations = Array.isArray(json.hallucinations)
115
+ ? json.hallucinations
116
+ : [];
117
+ const severities = hallucinations.map((h) => h.severity ?? "unknown");
118
+ const rawScore = typeof json.score === "number" ? json.score : 5;
119
+ const score = Math.max(0, Math.min(10, rawScore)); // Clamp to 0-10
120
+ return {
121
+ score,
122
+ reasoning: typeof json.reasoning === "string"
123
+ ? json.reasoning
124
+ : "No reasoning provided",
125
+ confidence: typeof json.confidence === "number" ? json.confidence : 0.8,
126
+ metadata: {
127
+ hallucinationCount: hallucinations.length,
128
+ hallucinations,
129
+ severityBreakdown: {
130
+ minor: severities.filter((s) => s === "minor").length,
131
+ moderate: severities.filter((s) => s === "moderate").length,
132
+ severe: severities.filter((s) => s === "severe").length,
133
+ },
134
+ },
135
+ };
136
+ }
137
+ }
138
+ export async function createHallucinationScorer(config) {
139
+ return new HallucinationScorer(config);
140
+ }
141
+ //# sourceMappingURL=hallucinationScorer.js.map
@@ -0,0 +1,15 @@
1
+ /**
2
+ * @file LLM Scorers Index
3
+ * Export all LLM-based scorers
4
+ */
5
+ export { AnswerRelevancyScorer, createAnswerRelevancyScorer, } from "./answerRelevancyScorer.js";
6
+ export { BaseLLMScorer, DEFAULT_LLM_SCORER_CONFIG } from "./baseLLMScorer.js";
7
+ export { BiasDetectionScorer, createBiasDetectionScorer, } from "./biasDetectionScorer.js";
8
+ export { ContextPrecisionScorer, createContextPrecisionScorer, } from "./contextPrecisionScorer.js";
9
+ export { ContextRelevancyScorer, createContextRelevancyScorer, } from "./contextRelevancyScorer.js";
10
+ export { createFaithfulnessScorer, FaithfulnessScorer, } from "./faithfulnessScorer.js";
11
+ export { createHallucinationScorer, HallucinationScorer, } from "./hallucinationScorer.js";
12
+ export { createPromptAlignmentScorer, PromptAlignmentScorer, } from "./promptAlignmentScorer.js";
13
+ export { createSummarizationScorer, SummarizationScorer, } from "./summarizationScorer.js";
14
+ export { createToneConsistencyScorer, ToneConsistencyScorer, } from "./toneConsistencyScorer.js";
15
+ export { createToxicityScorer, ToxicityScorer } from "./toxicityScorer.js";
@@ -0,0 +1,17 @@
1
+ /**
2
+ * @file LLM Scorers Index
3
+ * Export all LLM-based scorers
4
+ */
5
+ export { AnswerRelevancyScorer, createAnswerRelevancyScorer, } from "./answerRelevancyScorer.js";
6
+ export { BaseLLMScorer, DEFAULT_LLM_SCORER_CONFIG } from "./baseLLMScorer.js";
7
+ export { BiasDetectionScorer, createBiasDetectionScorer, } from "./biasDetectionScorer.js";
8
+ export { ContextPrecisionScorer, createContextPrecisionScorer, } from "./contextPrecisionScorer.js";
9
+ export { ContextRelevancyScorer, createContextRelevancyScorer, } from "./contextRelevancyScorer.js";
10
+ export { createFaithfulnessScorer, FaithfulnessScorer, } from "./faithfulnessScorer.js";
11
+ // LLM Scorers
12
+ export { createHallucinationScorer, HallucinationScorer, } from "./hallucinationScorer.js";
13
+ export { createPromptAlignmentScorer, PromptAlignmentScorer, } from "./promptAlignmentScorer.js";
14
+ export { createSummarizationScorer, SummarizationScorer, } from "./summarizationScorer.js";
15
+ export { createToneConsistencyScorer, ToneConsistencyScorer, } from "./toneConsistencyScorer.js";
16
+ export { createToxicityScorer, ToxicityScorer } from "./toxicityScorer.js";
17
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @file Prompt alignment scorer
3
+ * Measures how well the response aligns with prompt instructions
4
+ */
5
+ import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
6
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
7
+ export declare class PromptAlignmentScorer extends BaseLLMScorer {
8
+ constructor(config?: Partial<LLMScorerConfig>);
9
+ generatePrompt(input: ScorerInput): string;
10
+ parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
11
+ }
12
+ export declare function createPromptAlignmentScorer(config?: Partial<LLMScorerConfig>): Promise<PromptAlignmentScorer>;
@@ -0,0 +1,107 @@
1
+ /**
2
+ * @file Prompt alignment scorer
3
+ * Measures how well the response aligns with prompt instructions
4
+ */
5
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
6
+ const PROMPT_ALIGNMENT_PROMPT = `You are an expert at evaluating how well AI responses follow instructions.
7
+
8
+ **User Instructions/Query:**
9
+ {{query}}
10
+
11
+ **AI Response:**
12
+ {{response}}
13
+
14
+ ## Instructions
15
+
16
+ Analyze how well the response follows the instructions in the query. Check for:
17
+
18
+ 1. **Instruction Following**: Does it do what was asked?
19
+ 2. **Format Compliance**: Does it follow requested format?
20
+ 3. **Constraint Adherence**: Does it respect any constraints given?
21
+ 4. **Completeness**: Does it address all parts of the request?
22
+ 5. **No Hallucinated Instructions**: Does it avoid adding unrequested content?
23
+
24
+ ## Output Format (JSON)
25
+
26
+ {
27
+ "score": <0-10>,
28
+ "instructionFollowing": {
29
+ "score": <0-10>,
30
+ "details": "<explanation>"
31
+ },
32
+ "formatCompliance": {
33
+ "score": <0-10>,
34
+ "details": "<explanation>"
35
+ },
36
+ "constraintAdherence": {
37
+ "score": <0-10>,
38
+ "details": "<explanation>"
39
+ },
40
+ "completeness": {
41
+ "score": <0-10>,
42
+ "details": "<explanation>"
43
+ },
44
+ "missedInstructions": ["<list of missed requirements>"],
45
+ "extraContent": ["<list of unrequested content>"],
46
+ "reasoning": "<overall assessment>",
47
+ "confidence": <0.0-1.0>
48
+ }`;
49
+ export class PromptAlignmentScorer extends BaseLLMScorer {
50
+ constructor(config) {
51
+ super({
52
+ id: "prompt-alignment",
53
+ name: "Prompt Alignment",
54
+ description: "Measures how well the response aligns with prompt instructions",
55
+ type: "llm",
56
+ category: "quality",
57
+ version: "1.0.0",
58
+ defaultConfig: {
59
+ enabled: true,
60
+ threshold: 0.7,
61
+ weight: 1.0,
62
+ timeout: 25000,
63
+ retries: 2,
64
+ },
65
+ requiredInputs: ["query", "response"],
66
+ optionalInputs: [],
67
+ }, config);
68
+ }
69
+ generatePrompt(input) {
70
+ return this.substituteTemplate(PROMPT_ALIGNMENT_PROMPT, {
71
+ query: input.query,
72
+ response: input.response,
73
+ });
74
+ }
75
+ parseResponse(response, _input) {
76
+ const json = this.extractJSON(response);
77
+ if (!json) {
78
+ const score = this.extractScoreFromText(response);
79
+ return {
80
+ score,
81
+ reasoning: "Could not parse structured response",
82
+ confidence: 0.3,
83
+ };
84
+ }
85
+ const score = Math.min(10, Math.max(0, typeof json.score === "number" ? json.score : 5));
86
+ const confidence = Math.min(1, Math.max(0, typeof json.confidence === "number" ? json.confidence : 0.8));
87
+ return {
88
+ score,
89
+ reasoning: typeof json.reasoning === "string"
90
+ ? json.reasoning
91
+ : "No reasoning provided",
92
+ confidence,
93
+ metadata: {
94
+ instructionFollowing: json.instructionFollowing ?? null,
95
+ formatCompliance: json.formatCompliance ?? null,
96
+ constraintAdherence: json.constraintAdherence ?? null,
97
+ completeness: json.completeness ?? null,
98
+ missedInstructions: json.missedInstructions ?? [],
99
+ extraContent: json.extraContent ?? [],
100
+ },
101
+ };
102
+ }
103
+ }
104
+ export async function createPromptAlignmentScorer(config) {
105
+ return new PromptAlignmentScorer(config);
106
+ }
107
+ //# sourceMappingURL=promptAlignmentScorer.js.map
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @file Summarization quality scorer
3
+ * Evaluates the quality of AI-generated summaries
4
+ */
5
+ import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
6
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
7
+ export declare class SummarizationScorer extends BaseLLMScorer {
8
+ constructor(config?: Partial<LLMScorerConfig>);
9
+ generatePrompt(input: ScorerInput): string;
10
+ parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
11
+ }
12
+ export declare function createSummarizationScorer(config?: Partial<LLMScorerConfig>): Promise<SummarizationScorer>;
@@ -0,0 +1,115 @@
1
+ /**
2
+ * @file Summarization quality scorer
3
+ * Evaluates the quality of AI-generated summaries
4
+ */
5
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
6
+ const SUMMARIZATION_PROMPT = `You are an expert at evaluating summary quality.
7
+
8
+ **Summary to Evaluate:**
9
+ {{response}}
10
+
11
+ **Original Content:**
12
+ {{context}}
13
+
14
+ {{#if hasQuery}}
15
+ **Summarization Request:**
16
+ {{query}}
17
+ {{/if}}
18
+
19
+ ## Evaluation Criteria
20
+
21
+ 1. **Accuracy**: Is the summary factually correct?
22
+ 2. **Coverage**: Does it capture the key points?
23
+ 3. **Conciseness**: Is it appropriately brief?
24
+ 4. **Coherence**: Is it well-organized and readable?
25
+ 5. **No Hallucinations**: Does it avoid adding new information?
26
+
27
+ ## Output Format (JSON)
28
+
29
+ {
30
+ "score": <0-10>,
31
+ "accuracy": {
32
+ "score": <0-10>,
33
+ "errors": ["<list of factual errors>"]
34
+ },
35
+ "coverage": {
36
+ "score": <0-10>,
37
+ "keyPointsCovered": ["<covered points>"],
38
+ "keyPointsMissed": ["<missed points>"]
39
+ },
40
+ "conciseness": {
41
+ "score": <0-10>,
42
+ "assessment": "<too long|appropriate|too short>"
43
+ },
44
+ "coherence": {
45
+ "score": <0-10>,
46
+ "issues": ["<any coherence issues>"]
47
+ },
48
+ "hallucinations": ["<any fabricated information>"],
49
+ "reasoning": "<overall assessment>",
50
+ "confidence": <0.0-1.0>
51
+ }`;
52
+ export class SummarizationScorer extends BaseLLMScorer {
53
+ constructor(config) {
54
+ super({
55
+ id: "summarization",
56
+ name: "Summarization Quality",
57
+ description: "Evaluates the quality of AI-generated summaries",
58
+ type: "llm",
59
+ category: "quality",
60
+ version: "1.0.0",
61
+ defaultConfig: {
62
+ enabled: true,
63
+ threshold: 0.7,
64
+ weight: 1.0,
65
+ timeout: 25000,
66
+ retries: 2,
67
+ },
68
+ requiredInputs: ["response", "context"],
69
+ optionalInputs: ["query"],
70
+ }, config);
71
+ }
72
+ generatePrompt(input) {
73
+ let prompt = SUMMARIZATION_PROMPT;
74
+ const hasQuery = !!input.query;
75
+ prompt = this.processConditionals(prompt, { hasQuery });
76
+ return this.substituteTemplate(prompt, {
77
+ response: input.response,
78
+ context: input.context?.join("\n\n") ?? "",
79
+ query: hasQuery ? input.query : "",
80
+ });
81
+ }
82
+ parseResponse(response, _input) {
83
+ const json = this.extractJSON(response);
84
+ if (!json) {
85
+ const score = this.extractScoreFromText(response);
86
+ return {
87
+ score,
88
+ reasoning: "Could not parse structured response",
89
+ confidence: 0.3,
90
+ };
91
+ }
92
+ const score = Math.min(10, Math.max(0, typeof json.score === "number"
93
+ ? json.score
94
+ : this.extractScoreFromText(response)));
95
+ const confidence = Math.min(1, Math.max(0, typeof json.confidence === "number" ? json.confidence : 0.3));
96
+ return {
97
+ score,
98
+ reasoning: typeof json.reasoning === "string"
99
+ ? json.reasoning
100
+ : "No reasoning provided",
101
+ confidence,
102
+ metadata: {
103
+ accuracy: json.accuracy ?? null,
104
+ coverage: json.coverage ?? null,
105
+ conciseness: json.conciseness ?? null,
106
+ coherence: json.coherence ?? null,
107
+ hallucinations: json.hallucinations ?? [],
108
+ },
109
+ };
110
+ }
111
+ }
112
+ export async function createSummarizationScorer(config) {
113
+ return new SummarizationScorer(config);
114
+ }
115
+ //# sourceMappingURL=summarizationScorer.js.map
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @file Tone consistency scorer
3
+ * Checks for consistent tone throughout the response
4
+ */
5
+ import type { LLMScorerConfig, ScoreResult, ScorerInput } from "../../../types/scorerTypes.js";
6
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
7
+ export declare class ToneConsistencyScorer extends BaseLLMScorer {
8
+ constructor(config?: Partial<LLMScorerConfig>);
9
+ generatePrompt(input: ScorerInput): string;
10
+ parseResponse(response: string, _input: ScorerInput): Partial<ScoreResult>;
11
+ }
12
+ export declare function createToneConsistencyScorer(config?: Partial<LLMScorerConfig>): Promise<ToneConsistencyScorer>;
@@ -0,0 +1,107 @@
1
+ /**
2
+ * @file Tone consistency scorer
3
+ * Checks for consistent tone throughout the response
4
+ */
5
+ import { BaseLLMScorer } from "./baseLLMScorer.js";
6
+ const TONE_CONSISTENCY_PROMPT = `You are an expert at analyzing writing tone and style consistency.
7
+
8
+ {{#if hasQuery}}
9
+ **User Query:**
10
+ {{query}}
11
+ {{/if}}
12
+
13
+ **AI Response:**
14
+ {{response}}
15
+
16
+ ## Instructions
17
+
18
+ Analyze the response for tone consistency. Check for:
19
+ 1. Consistent formality level throughout
20
+ 2. Consistent emotional tone
21
+ 3. Consistent voice (e.g., professional, casual, friendly)
22
+ 4. No jarring shifts in style
23
+ 5. Appropriate tone for the context
24
+
25
+ ## Output Format (JSON)
26
+
27
+ {
28
+ "score": <0-10>,
29
+ "dominantTone": "<identified main tone>",
30
+ "formalityLevel": "<formal|semi-formal|casual|mixed>",
31
+ "toneShifts": [
32
+ {
33
+ "location": "<beginning|middle|end>",
34
+ "from": "<original tone>",
35
+ "to": "<shifted tone>",
36
+ "severity": "<minor|moderate|major>"
37
+ }
38
+ ],
39
+ "reasoning": "<detailed assessment>",
40
+ "confidence": <0.0-1.0>
41
+ }`;
42
+ export class ToneConsistencyScorer extends BaseLLMScorer {
43
+ constructor(config) {
44
+ super({
45
+ id: "tone-consistency",
46
+ name: "Tone Consistency",
47
+ description: "Checks for consistent tone throughout the response",
48
+ type: "llm",
49
+ category: "quality",
50
+ version: "1.0.0",
51
+ defaultConfig: {
52
+ enabled: true,
53
+ threshold: 0.7,
54
+ weight: 0.8,
55
+ timeout: 20000,
56
+ retries: 1,
57
+ },
58
+ requiredInputs: ["response"],
59
+ optionalInputs: ["query"],
60
+ }, config);
61
+ }
62
+ generatePrompt(input) {
63
+ let prompt = TONE_CONSISTENCY_PROMPT;
64
+ const hasQuery = !!input.query;
65
+ prompt = this.processConditionals(prompt, { hasQuery });
66
+ if (hasQuery) {
67
+ prompt = this.substituteTemplate(prompt, { query: input.query });
68
+ }
69
+ prompt = this.substituteTemplate(prompt, { response: input.response });
70
+ return prompt;
71
+ }
72
+ parseResponse(response, _input) {
73
+ const json = this.extractJSON(response);
74
+ if (!json) {
75
+ const score = this.extractScoreFromText(response);
76
+ return {
77
+ score,
78
+ reasoning: "Could not parse structured response",
79
+ confidence: 0.3,
80
+ };
81
+ }
82
+ const toneShifts = Array.isArray(json.toneShifts)
83
+ ? json.toneShifts
84
+ : [];
85
+ const score = typeof json.score === "number"
86
+ ? json.score
87
+ : this.extractScoreFromText(response);
88
+ const confidence = typeof json.confidence === "number" ? json.confidence : 0.3;
89
+ return {
90
+ score,
91
+ reasoning: typeof json.reasoning === "string"
92
+ ? json.reasoning
93
+ : "Could not parse structured response",
94
+ confidence,
95
+ metadata: {
96
+ dominantTone: json.dominantTone ?? "unknown",
97
+ formalityLevel: json.formalityLevel ?? "unknown",
98
+ toneShifts,
99
+ shiftCount: toneShifts.length,
100
+ },
101
+ };
102
+ }
103
+ }
104
+ export async function createToneConsistencyScorer(config) {
105
+ return new ToneConsistencyScorer(config);
106
+ }
107
+ //# sourceMappingURL=toneConsistencyScorer.js.map