@juspay/neurolink 9.36.1 → 9.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/dist/auth/errors.d.ts +1 -1
  3. package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
  4. package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
  5. package/dist/browser/neurolink.min.js +921 -423
  6. package/dist/cli/commands/evaluate.d.ts +48 -0
  7. package/dist/cli/commands/evaluate.js +955 -0
  8. package/dist/cli/parser.js +4 -1
  9. package/dist/evaluation/BatchEvaluator.d.ts +163 -0
  10. package/dist/evaluation/BatchEvaluator.js +267 -0
  11. package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
  12. package/dist/evaluation/EvaluationAggregator.js +377 -0
  13. package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
  14. package/dist/evaluation/EvaluatorFactory.js +280 -0
  15. package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
  16. package/dist/evaluation/EvaluatorRegistry.js +184 -0
  17. package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
  18. package/dist/evaluation/errors/EvaluationError.js +206 -0
  19. package/dist/evaluation/errors/index.d.ts +4 -0
  20. package/dist/evaluation/errors/index.js +4 -0
  21. package/dist/evaluation/hooks/index.d.ts +6 -0
  22. package/dist/evaluation/hooks/index.js +6 -0
  23. package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  24. package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
  25. package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
  26. package/dist/evaluation/hooks/observabilityHooks.js +181 -0
  27. package/dist/evaluation/index.d.ts +11 -2
  28. package/dist/evaluation/index.js +15 -0
  29. package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  30. package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
  31. package/dist/evaluation/pipeline/index.d.ts +8 -0
  32. package/dist/evaluation/pipeline/index.js +8 -0
  33. package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  34. package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
  35. package/dist/evaluation/pipeline/presets.d.ts +66 -0
  36. package/dist/evaluation/pipeline/presets.js +224 -0
  37. package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  38. package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
  39. package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
  40. package/dist/evaluation/pipeline/strategies/index.js +6 -0
  41. package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  42. package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
  43. package/dist/evaluation/reporting/index.d.ts +6 -0
  44. package/dist/evaluation/reporting/index.js +6 -0
  45. package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
  46. package/dist/evaluation/reporting/metricsCollector.js +285 -0
  47. package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
  48. package/dist/evaluation/reporting/reportGenerator.js +374 -0
  49. package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
  50. package/dist/evaluation/scorers/baseScorer.js +232 -0
  51. package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
  52. package/dist/evaluation/scorers/customScorerUtils.js +381 -0
  53. package/dist/evaluation/scorers/index.d.ts +10 -0
  54. package/dist/evaluation/scorers/index.js +16 -0
  55. package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  56. package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
  57. package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  58. package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
  59. package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  60. package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
  61. package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  62. package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
  63. package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  64. package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
  65. package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  66. package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
  67. package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  68. package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
  69. package/dist/evaluation/scorers/llm/index.d.ts +15 -0
  70. package/dist/evaluation/scorers/llm/index.js +16 -0
  71. package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  72. package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
  73. package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  74. package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
  75. package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  76. package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
  77. package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  78. package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
  79. package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  80. package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
  81. package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  82. package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
  83. package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  84. package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
  85. package/dist/evaluation/scorers/rule/index.d.ts +9 -0
  86. package/dist/evaluation/scorers/rule/index.js +10 -0
  87. package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  88. package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
  89. package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  90. package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
  91. package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
  92. package/dist/evaluation/scorers/scorerBuilder.js +420 -0
  93. package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
  94. package/dist/evaluation/scorers/scorerRegistry.js +467 -0
  95. package/dist/index.d.ts +37 -25
  96. package/dist/index.js +65 -26
  97. package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
  98. package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
  99. package/dist/lib/evaluation/BatchEvaluator.js +268 -0
  100. package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
  101. package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
  102. package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
  103. package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
  104. package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
  105. package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
  106. package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
  107. package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
  108. package/dist/lib/evaluation/errors/index.d.ts +4 -0
  109. package/dist/lib/evaluation/errors/index.js +5 -0
  110. package/dist/lib/evaluation/hooks/index.d.ts +6 -0
  111. package/dist/lib/evaluation/hooks/index.js +7 -0
  112. package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
  113. package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
  114. package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
  115. package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
  116. package/dist/lib/evaluation/index.d.ts +11 -2
  117. package/dist/lib/evaluation/index.js +15 -0
  118. package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
  119. package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
  120. package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
  121. package/dist/lib/evaluation/pipeline/index.js +9 -0
  122. package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
  123. package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
  124. package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
  125. package/dist/lib/evaluation/pipeline/presets.js +225 -0
  126. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
  127. package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
  128. package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
  129. package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
  130. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
  131. package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
  132. package/dist/lib/evaluation/reporting/index.d.ts +6 -0
  133. package/dist/lib/evaluation/reporting/index.js +7 -0
  134. package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
  135. package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
  136. package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
  137. package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
  138. package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
  139. package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
  140. package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
  141. package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
  142. package/dist/lib/evaluation/scorers/index.d.ts +10 -0
  143. package/dist/lib/evaluation/scorers/index.js +17 -0
  144. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
  145. package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
  146. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
  147. package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
  148. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
  149. package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
  150. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
  151. package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
  152. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
  153. package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
  154. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
  155. package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
  156. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
  157. package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
  158. package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
  159. package/dist/lib/evaluation/scorers/llm/index.js +17 -0
  160. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
  161. package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
  162. package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
  163. package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
  164. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
  165. package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
  166. package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
  167. package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
  168. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
  169. package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
  170. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
  171. package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
  172. package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
  173. package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
  174. package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
  175. package/dist/lib/evaluation/scorers/rule/index.js +11 -0
  176. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
  177. package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
  178. package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
  179. package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
  180. package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
  181. package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
  182. package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
  183. package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
  184. package/dist/lib/index.d.ts +37 -25
  185. package/dist/lib/index.js +65 -26
  186. package/dist/lib/neurolink.d.ts +204 -0
  187. package/dist/lib/neurolink.js +296 -0
  188. package/dist/lib/types/index.d.ts +3 -1
  189. package/dist/lib/types/index.js +3 -2
  190. package/dist/lib/types/scorerTypes.d.ts +423 -0
  191. package/dist/lib/types/scorerTypes.js +6 -0
  192. package/dist/lib/utils/errorHandling.d.ts +20 -0
  193. package/dist/lib/utils/errorHandling.js +60 -0
  194. package/dist/neurolink.d.ts +204 -0
  195. package/dist/neurolink.js +296 -0
  196. package/dist/types/index.d.ts +3 -1
  197. package/dist/types/index.js +3 -2
  198. package/dist/types/scorerTypes.d.ts +423 -0
  199. package/dist/types/scorerTypes.js +5 -0
  200. package/dist/utils/errorHandling.d.ts +20 -0
  201. package/dist/utils/errorHandling.js +60 -0
  202. package/package.json +1 -1
@@ -0,0 +1,99 @@
1
+ /**
2
+ * @file Langfuse Adapter
3
+ * Integration with Langfuse for LLM observability
4
+ */
5
+ /**
6
+ * Langfuse client interface (minimal for type safety)
7
+ */
8
+ export type LangfuseClient = {
9
+ score: (params: {
10
+ name: string;
11
+ value: number;
12
+ traceId?: string;
13
+ observationId?: string;
14
+ comment?: string;
15
+ metadata?: Record<string, unknown>;
16
+ }) => Promise<unknown>;
17
+ trace?: (params: {
18
+ name: string;
19
+ metadata?: Record<string, unknown>;
20
+ tags?: string[];
21
+ }) => {
22
+ id: string;
23
+ };
24
+ shutdown?: () => Promise<void>;
25
+ };
26
+ /**
27
+ * Langfuse adapter configuration
28
+ */
29
+ export type LangfuseAdapterConfig = {
30
+ /** Langfuse client instance */
31
+ client: LangfuseClient;
32
+ /** Prefix for score names */
33
+ scorePrefix?: string;
34
+ /** Include detailed metadata */
35
+ includeMetadata?: boolean;
36
+ /** Tags to add to all scores */
37
+ tags?: string[];
38
+ /** Whether to send pipeline-level scores */
39
+ sendPipelineScores?: boolean;
40
+ /** Whether to send individual scorer scores */
41
+ sendScorerScores?: boolean;
42
+ };
43
+ /**
44
+ * Langfuse adapter for evaluation observability
45
+ */
46
+ export declare class LangfuseAdapter {
47
+ private _config;
48
+ private _unsubscribers;
49
+ private _traceIdMap;
50
+ constructor(config: LangfuseAdapterConfig);
51
+ /**
52
+ * Start listening to evaluation events
53
+ */
54
+ start(): void;
55
+ /**
56
+ * Stop listening to events
57
+ */
58
+ stop(): void;
59
+ /**
60
+ * Send scorer score to Langfuse
61
+ */
62
+ private _sendScorerScore;
63
+ /**
64
+ * Send pipeline scores to Langfuse
65
+ */
66
+ private _sendPipelineScores;
67
+ /**
68
+ * Manually send a score to Langfuse
69
+ */
70
+ sendScore(name: string, value: number, options?: {
71
+ traceId?: string;
72
+ comment?: string;
73
+ metadata?: Record<string, unknown>;
74
+ }): Promise<void>;
75
+ /**
76
+ * Shutdown the adapter and flush any pending data
77
+ */
78
+ shutdown(): Promise<void>;
79
+ }
80
+ /**
81
+ * Create a Langfuse adapter
82
+ */
83
+ export declare function createLangfuseAdapter(config: LangfuseAdapterConfig): LangfuseAdapter;
84
+ /**
85
+ * Create and start a Langfuse adapter
86
+ */
87
+ export declare function startLangfuseAdapter(config: LangfuseAdapterConfig): LangfuseAdapter;
88
+ /**
89
+ * Helper: Create a mock Langfuse client for testing
90
+ */
91
+ export declare function createMockLangfuseClient(): LangfuseClient & {
92
+ scores: Array<{
93
+ name: string;
94
+ value: number;
95
+ traceId?: string;
96
+ comment?: string;
97
+ metadata?: Record<string, unknown>;
98
+ }>;
99
+ };
@@ -0,0 +1,173 @@
1
+ /**
2
+ * @file Langfuse Adapter
3
+ * Integration with Langfuse for LLM observability
4
+ */
5
+ import { logger } from "../../utils/logger.js";
6
+ import { observabilityHooks } from "./observabilityHooks.js";
7
+ /**
8
+ * Langfuse adapter for evaluation observability
9
+ */
10
+ export class LangfuseAdapter {
11
+ _config;
12
+ _unsubscribers = [];
13
+ _traceIdMap = new Map();
14
+ constructor(config) {
15
+ this._config = {
16
+ scorePrefix: "eval",
17
+ includeMetadata: true,
18
+ tags: [],
19
+ sendPipelineScores: true,
20
+ sendScorerScores: true,
21
+ ...config,
22
+ };
23
+ }
24
+ /**
25
+ * Start listening to evaluation events
26
+ */
27
+ start() {
28
+ // Prevent duplicate subscriptions
29
+ if (this._unsubscribers.length > 0) {
30
+ return;
31
+ }
32
+ // Listen for scorer completions
33
+ if (this._config.sendScorerScores) {
34
+ const scorerUnsub = observabilityHooks.on("scorer:end", (event) => {
35
+ this._sendScorerScore(event.result, event.traceContext?.traceId);
36
+ });
37
+ this._unsubscribers.push(scorerUnsub);
38
+ }
39
+ // Listen for pipeline completions
40
+ if (this._config.sendPipelineScores) {
41
+ const pipelineUnsub = observabilityHooks.on("pipeline:end", (event) => {
42
+ this._sendPipelineScores(event.result, event.traceContext?.traceId);
43
+ });
44
+ this._unsubscribers.push(pipelineUnsub);
45
+ }
46
+ logger.debug("Langfuse adapter started");
47
+ }
48
+ /**
49
+ * Stop listening to events
50
+ */
51
+ stop() {
52
+ for (const unsub of this._unsubscribers) {
53
+ unsub();
54
+ }
55
+ this._unsubscribers = [];
56
+ this._traceIdMap.clear();
57
+ logger.debug("Langfuse adapter stopped");
58
+ }
59
+ /**
60
+ * Send scorer score to Langfuse
61
+ */
62
+ async _sendScorerScore(result, traceId) {
63
+ try {
64
+ const scoreName = `${this._config.scorePrefix}.${result.scorerId}`;
65
+ const normalizedValue = result.normalizedScore; // Already 0-1
66
+ await this._config.client.score({
67
+ name: scoreName,
68
+ value: normalizedValue,
69
+ traceId,
70
+ comment: result.reasoning,
71
+ metadata: this._config.includeMetadata
72
+ ? {
73
+ passed: result.passed,
74
+ threshold: result.threshold,
75
+ computeTime: result.computeTime,
76
+ confidence: result.confidence,
77
+ ...(result.metadata ?? {}),
78
+ }
79
+ : undefined,
80
+ });
81
+ logger.debug(`Sent score to Langfuse: ${scoreName}=${normalizedValue}`);
82
+ }
83
+ catch (error) {
84
+ logger.error("Failed to send score to Langfuse", {
85
+ scorerId: result.scorerId,
86
+ error: error instanceof Error ? error.message : String(error),
87
+ });
88
+ }
89
+ }
90
+ /**
91
+ * Send pipeline scores to Langfuse
92
+ */
93
+ async _sendPipelineScores(result, externalTraceId) {
94
+ const traceId = externalTraceId ?? result.correlationId;
95
+ const pipelineName = result.pipelineConfig.name ?? "unnamed";
96
+ try {
97
+ // Send overall pipeline score
98
+ await this._config.client.score({
99
+ name: `${this._config.scorePrefix}.pipeline.${pipelineName}.overall`,
100
+ value: result.overallScore / 10, // Normalize to 0-1
101
+ traceId,
102
+ comment: `Pipeline evaluation: ${result.passed ? "PASSED" : "FAILED"}`,
103
+ metadata: this._config.includeMetadata
104
+ ? {
105
+ passed: result.passed,
106
+ aggregationMethod: result.aggregationMethod,
107
+ scorerCount: result.scores.length,
108
+ totalComputeTime: result.totalComputeTime,
109
+ errorCount: result.errors.length,
110
+ }
111
+ : undefined,
112
+ });
113
+ logger.debug(`Sent pipeline score to Langfuse: ${pipelineName}=${result.overallScore / 10}`);
114
+ }
115
+ catch (error) {
116
+ logger.error("Failed to send pipeline score to Langfuse", {
117
+ pipelineName,
118
+ error: error instanceof Error ? error.message : String(error),
119
+ });
120
+ }
121
+ }
122
+ /**
123
+ * Manually send a score to Langfuse
124
+ */
125
+ async sendScore(name, value, options) {
126
+ const scoreName = `${this._config.scorePrefix}.${name}`;
127
+ await this._config.client.score({
128
+ name: scoreName,
129
+ value,
130
+ traceId: options?.traceId,
131
+ comment: options?.comment,
132
+ metadata: options?.metadata,
133
+ });
134
+ }
135
+ /**
136
+ * Shutdown the adapter and flush any pending data
137
+ */
138
+ async shutdown() {
139
+ this.stop();
140
+ if (this._config.client.shutdown) {
141
+ await this._config.client.shutdown();
142
+ }
143
+ }
144
+ }
145
+ /**
146
+ * Create a Langfuse adapter
147
+ */
148
+ export function createLangfuseAdapter(config) {
149
+ return new LangfuseAdapter(config);
150
+ }
151
+ /**
152
+ * Create and start a Langfuse adapter
153
+ */
154
+ export function startLangfuseAdapter(config) {
155
+ const adapter = new LangfuseAdapter(config);
156
+ adapter.start();
157
+ return adapter;
158
+ }
159
+ /**
160
+ * Helper: Create a mock Langfuse client for testing
161
+ */
162
+ export function createMockLangfuseClient() {
163
+ const scores = [];
164
+ return {
165
+ scores,
166
+ score: async (params) => {
167
+ scores.push(params);
168
+ return { id: `score-${scores.length}` };
169
+ },
170
+ shutdown: async () => { },
171
+ };
172
+ }
173
+ //# sourceMappingURL=langfuseAdapter.js.map
@@ -0,0 +1,129 @@
1
+ /**
2
+ * @file Observability Hooks
3
+ * OpenTelemetry integration for evaluation tracing
4
+ */
5
+ import type { EvaluationTraceContext, ScoreResult } from "../../types/scorerTypes.js";
6
+ import type { PipelineResult } from "../pipeline/evaluationPipeline.js";
7
+ /**
8
+ * Event handler type
9
+ */
10
+ export type EventHandler<T> = (event: T) => void | Promise<void>;
11
+ /**
12
+ * Evaluation events
13
+ */
14
+ export type EvaluationEvents = {
15
+ "scorer:start": {
16
+ scorerId: string;
17
+ scorerName: string;
18
+ timestamp: number;
19
+ traceContext?: EvaluationTraceContext;
20
+ };
21
+ "scorer:end": {
22
+ scorerId: string;
23
+ scorerName: string;
24
+ result: ScoreResult;
25
+ timestamp: number;
26
+ duration: number;
27
+ traceContext?: EvaluationTraceContext;
28
+ };
29
+ "scorer:error": {
30
+ scorerId: string;
31
+ scorerName: string;
32
+ error: string;
33
+ timestamp: number;
34
+ traceContext?: EvaluationTraceContext;
35
+ };
36
+ "pipeline:start": {
37
+ pipelineName: string;
38
+ scorerCount: number;
39
+ timestamp: number;
40
+ correlationId: string;
41
+ traceContext?: EvaluationTraceContext;
42
+ };
43
+ "pipeline:end": {
44
+ pipelineName: string;
45
+ result: PipelineResult;
46
+ timestamp: number;
47
+ duration: number;
48
+ traceContext?: EvaluationTraceContext;
49
+ };
50
+ "pipeline:error": {
51
+ pipelineName: string;
52
+ error: string;
53
+ timestamp: number;
54
+ traceContext?: EvaluationTraceContext;
55
+ };
56
+ };
57
+ /**
58
+ * Observability hooks manager
59
+ */
60
+ export declare class ObservabilityHooks {
61
+ private _handlers;
62
+ private _traceContext?;
63
+ private _enabled;
64
+ /**
65
+ * Enable/disable observability
66
+ */
67
+ set enabled(value: boolean);
68
+ get enabled(): boolean;
69
+ /**
70
+ * Set trace context for all events
71
+ */
72
+ setTraceContext(context: EvaluationTraceContext): void;
73
+ /**
74
+ * Clear trace context
75
+ */
76
+ clearTraceContext(): void;
77
+ /**
78
+ * Get current trace context
79
+ */
80
+ getTraceContext(): EvaluationTraceContext | undefined;
81
+ /**
82
+ * Register an event handler
83
+ */
84
+ on<K extends keyof EvaluationEvents>(event: K, handler: EventHandler<EvaluationEvents[K]>): () => void;
85
+ /**
86
+ * Remove an event handler
87
+ */
88
+ off<K extends keyof EvaluationEvents>(event: K, handler: EventHandler<EvaluationEvents[K]>): void;
89
+ /**
90
+ * Emit an event
91
+ */
92
+ emit<K extends keyof EvaluationEvents>(event: K, data: Omit<EvaluationEvents[K], "traceContext">): Promise<void>;
93
+ /**
94
+ * Clear all handlers
95
+ */
96
+ clear(): void;
97
+ /**
98
+ * Get handler count for an event
99
+ */
100
+ listenerCount(event: keyof EvaluationEvents): number;
101
+ }
102
+ /**
103
+ * Global observability hooks instance
104
+ */
105
+ export declare const observabilityHooks: ObservabilityHooks;
106
+ /**
107
+ * Helper: Create a console logger hook
108
+ */
109
+ export declare function createConsoleLoggerHook(): void;
110
+ /**
111
+ * Helper: Create a metrics collector hook
112
+ * Accepts the actual MetricsCollector interface from reporting/metricsCollector
113
+ */
114
+ export declare function createMetricsCollectorHook(collector: {
115
+ recordScorer: (scorerId: string, scorerName: string, result: ScoreResult) => void;
116
+ recordPipeline: (result: PipelineResult) => void;
117
+ }): void;
118
+ /**
119
+ * OpenTelemetry span attributes
120
+ */
121
+ export type SpanAttributes = Record<string, string | number | boolean>;
122
+ /**
123
+ * Create span attributes from scorer result
124
+ */
125
+ export declare function scorerToSpanAttributes(result: ScoreResult): SpanAttributes;
126
+ /**
127
+ * Create span attributes from pipeline result
128
+ */
129
+ export declare function pipelineToSpanAttributes(result: PipelineResult): SpanAttributes;
@@ -0,0 +1,182 @@
1
+ /**
2
+ * @file Observability Hooks
3
+ * OpenTelemetry integration for evaluation tracing
4
+ */
5
+ import { logger } from "../../utils/logger.js";
6
+ /**
7
+ * Observability hooks manager
8
+ */
9
+ export class ObservabilityHooks {
10
+ _handlers = new Map();
11
+ _traceContext;
12
+ _enabled = true;
13
+ /**
14
+ * Enable/disable observability
15
+ */
16
+ set enabled(value) {
17
+ this._enabled = value;
18
+ }
19
+ get enabled() {
20
+ return this._enabled;
21
+ }
22
+ /**
23
+ * Set trace context for all events
24
+ */
25
+ setTraceContext(context) {
26
+ this._traceContext = context;
27
+ }
28
+ /**
29
+ * Clear trace context
30
+ */
31
+ clearTraceContext() {
32
+ this._traceContext = undefined;
33
+ }
34
+ /**
35
+ * Get current trace context
36
+ */
37
+ getTraceContext() {
38
+ return this._traceContext;
39
+ }
40
+ /**
41
+ * Register an event handler
42
+ */
43
+ on(event, handler) {
44
+ if (!this._handlers.has(event)) {
45
+ this._handlers.set(event, new Set());
46
+ }
47
+ this._handlers.get(event).add(handler);
48
+ // Return unsubscribe function
49
+ return () => {
50
+ this._handlers.get(event)?.delete(handler);
51
+ };
52
+ }
53
+ /**
54
+ * Remove an event handler
55
+ */
56
+ off(event, handler) {
57
+ this._handlers.get(event)?.delete(handler);
58
+ }
59
+ /**
60
+ * Emit an event
61
+ */
62
+ async emit(event, data) {
63
+ if (!this._enabled) {
64
+ return;
65
+ }
66
+ const handlers = this._handlers.get(event);
67
+ if (!handlers || handlers.size === 0) {
68
+ return;
69
+ }
70
+ const eventData = {
71
+ ...data,
72
+ traceContext: this._traceContext,
73
+ };
74
+ const promises = [];
75
+ for (const handler of handlers) {
76
+ try {
77
+ const result = handler(eventData);
78
+ if (result instanceof Promise) {
79
+ promises.push(result.catch((err) => {
80
+ logger.error(`Event handler error for ${event}`, { error: err });
81
+ }));
82
+ }
83
+ }
84
+ catch (error) {
85
+ logger.error(`Event handler error for ${event}`, { error });
86
+ }
87
+ }
88
+ // Wait for async handlers
89
+ await Promise.all(promises);
90
+ }
91
+ /**
92
+ * Clear all handlers
93
+ */
94
+ clear() {
95
+ this._handlers.clear();
96
+ }
97
+ /**
98
+ * Get handler count for an event
99
+ */
100
+ listenerCount(event) {
101
+ return this._handlers.get(event)?.size ?? 0;
102
+ }
103
+ }
104
+ /**
105
+ * Global observability hooks instance
106
+ */
107
+ export const observabilityHooks = new ObservabilityHooks();
108
+ /**
109
+ * Helper: Create a console logger hook
110
+ */
111
+ export function createConsoleLoggerHook() {
112
+ observabilityHooks.on("scorer:start", (event) => {
113
+ logger.info(`[SCORER] ${event.scorerName} started at ${new Date(event.timestamp).toISOString()}`);
114
+ });
115
+ observabilityHooks.on("scorer:end", (event) => {
116
+ logger.info(`[SCORER] ${event.scorerName} completed: score=${event.result.score.toFixed(1)}, ` +
117
+ `passed=${event.result.passed}, duration=${event.duration}ms`);
118
+ });
119
+ observabilityHooks.on("scorer:error", (event) => {
120
+ logger.error(`[SCORER] ${event.scorerName} error: ${event.error}`);
121
+ });
122
+ observabilityHooks.on("pipeline:start", (event) => {
123
+ logger.info(`[PIPELINE] ${event.pipelineName} started with ${event.scorerCount} scorers ` +
124
+ `(correlationId: ${event.correlationId})`);
125
+ });
126
+ observabilityHooks.on("pipeline:end", (event) => {
127
+ logger.info(`[PIPELINE] ${event.pipelineName} completed: overall=${event.result.overallScore.toFixed(1)}, ` +
128
+ `passed=${event.result.passed}, duration=${event.duration}ms`);
129
+ });
130
+ observabilityHooks.on("pipeline:error", (event) => {
131
+ logger.error(`[PIPELINE] ${event.pipelineName} error: ${event.error}`);
132
+ });
133
+ }
134
+ /**
135
+ * Helper: Create a metrics collector hook
136
+ * Accepts the actual MetricsCollector interface from reporting/metricsCollector
137
+ */
138
+ export function createMetricsCollectorHook(collector) {
139
+ observabilityHooks.on("scorer:end", (event) => {
140
+ collector.recordScorer(event.scorerId, event.scorerName, event.result);
141
+ });
142
+ observabilityHooks.on("pipeline:end", (event) => {
143
+ collector.recordPipeline(event.result);
144
+ });
145
+ }
146
+ /**
147
+ * Create span attributes from scorer result
148
+ */
149
+ export function scorerToSpanAttributes(result) {
150
+ return {
151
+ "scorer.id": result.scorerId,
152
+ "scorer.name": result.scorerName,
153
+ "scorer.score": result.score,
154
+ "scorer.normalizedScore": result.normalizedScore,
155
+ "scorer.passed": result.passed,
156
+ "scorer.threshold": result.threshold,
157
+ "scorer.computeTime": result.computeTime,
158
+ ...(result.confidence !== undefined && {
159
+ "scorer.confidence": result.confidence,
160
+ }),
161
+ ...(result.error && { "scorer.error": result.error }),
162
+ };
163
+ }
164
+ /**
165
+ * Create span attributes from pipeline result
166
+ */
167
+ export function pipelineToSpanAttributes(result) {
168
+ return {
169
+ "pipeline.name": result.pipelineConfig.name ?? "unnamed",
170
+ "pipeline.overallScore": result.overallScore,
171
+ "pipeline.passed": result.passed,
172
+ "pipeline.aggregationMethod": result.aggregationMethod,
173
+ "pipeline.scorerCount": result.scores.length,
174
+ "pipeline.totalComputeTime": result.totalComputeTime,
175
+ "pipeline.errorCount": result.errors.length,
176
+ "pipeline.skippedCount": result.skippedScorers.length,
177
+ ...(result.correlationId && {
178
+ "pipeline.correlationId": result.correlationId,
179
+ }),
180
+ };
181
+ }
182
+ //# sourceMappingURL=observabilityHooks.js.map
@@ -2,10 +2,19 @@
2
2
  * @file This file exports the main Evaluator class, which serves as the central entry point for the evaluation system.
3
3
  */
4
4
  import type { LanguageModelV3CallOptions } from "@ai-sdk/provider";
5
- import type { GenerateResult } from "../types/generateTypes.js";
5
+ import type { EvaluationData } from "../types/evaluation.js";
6
6
  import type { EvaluationConfig } from "../types/evaluationTypes.js";
7
+ import type { GenerateResult } from "../types/generateTypes.js";
7
8
  import type { AutoEvaluationConfig } from "../types/middlewareTypes.js";
8
- import type { EvaluationData } from "../types/evaluation.js";
9
+ export * from "./errors/index.js";
10
+ export * from "./hooks/index.js";
11
+ export * from "./pipeline/index.js";
12
+ export * from "./reporting/index.js";
13
+ export * from "./scorers/index.js";
14
+ export { BatchEvaluator, type BatchEvaluationConfig, type BatchEvaluationItem, type BatchEvaluationItemResult, type BatchEvaluationResult, } from "./BatchEvaluator.js";
15
+ export { EvaluationAggregator, type ScoreStatistics, type ScoreDistribution, type TrendAnalysis, type DimensionAnalysis, type AlertSummary, type AggregationResult, } from "./EvaluationAggregator.js";
16
+ export { EvaluatorFactory, getEvaluatorFactory, type EvaluatorPreset, } from "./EvaluatorFactory.js";
17
+ export { EvaluatorRegistry, getEvaluatorRegistry, type EvaluationStrategyFunction, type EvaluationStrategyConfig, type EvaluationStrategyMetadata, } from "./EvaluatorRegistry.js";
9
18
  /**
10
19
  * A centralized class for performing response evaluations. It supports different
11
20
  * evaluation strategies, with RAGAS-style model-based evaluation as the default.
@@ -4,6 +4,21 @@
4
4
  import { ContextBuilder } from "./contextBuilder.js";
5
5
  import { RAGASEvaluator } from "./ragasEvaluator.js";
6
6
  import { mapToEvaluationData } from "./scoring.js";
7
+ // Re-export errors
8
+ export * from "./errors/index.js";
9
+ // Re-export hooks
10
+ export * from "./hooks/index.js";
11
+ // Re-export pipeline
12
+ export * from "./pipeline/index.js";
13
+ // Re-export reporting
14
+ export * from "./reporting/index.js";
15
+ // Re-export scorers
16
+ export * from "./scorers/index.js";
17
+ // Re-export Factory and Registry (Mastra-inspired patterns)
18
+ export { BatchEvaluator, } from "./BatchEvaluator.js";
19
+ export { EvaluationAggregator, } from "./EvaluationAggregator.js";
20
+ export { EvaluatorFactory, getEvaluatorFactory, } from "./EvaluatorFactory.js";
21
+ export { EvaluatorRegistry, getEvaluatorRegistry, } from "./EvaluatorRegistry.js";
7
22
  /**
8
23
  * A centralized class for performing response evaluations. It supports different
9
24
  * evaluation strategies, with RAGAS-style model-based evaluation as the default.