@juspay/neurolink 9.36.1 → 9.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/auth/errors.d.ts +1 -1
- package/dist/auth/middleware/AuthMiddleware.d.ts +1 -1
- package/dist/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/browser/neurolink.min.js +921 -423
- package/dist/cli/commands/evaluate.d.ts +48 -0
- package/dist/cli/commands/evaluate.js +955 -0
- package/dist/cli/parser.js +4 -1
- package/dist/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/evaluation/BatchEvaluator.js +267 -0
- package/dist/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/evaluation/EvaluationAggregator.js +377 -0
- package/dist/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/evaluation/EvaluatorFactory.js +280 -0
- package/dist/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/evaluation/EvaluatorRegistry.js +184 -0
- package/dist/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/evaluation/errors/EvaluationError.js +206 -0
- package/dist/evaluation/errors/index.d.ts +4 -0
- package/dist/evaluation/errors/index.js +4 -0
- package/dist/evaluation/hooks/index.d.ts +6 -0
- package/dist/evaluation/hooks/index.js +6 -0
- package/dist/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/evaluation/hooks/langfuseAdapter.js +172 -0
- package/dist/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/evaluation/hooks/observabilityHooks.js +181 -0
- package/dist/evaluation/index.d.ts +11 -2
- package/dist/evaluation/index.js +15 -0
- package/dist/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/evaluation/pipeline/evaluationPipeline.js +381 -0
- package/dist/evaluation/pipeline/index.d.ts +8 -0
- package/dist/evaluation/pipeline/index.js +8 -0
- package/dist/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/evaluation/pipeline/pipelineBuilder.js +260 -0
- package/dist/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/evaluation/pipeline/presets.js +224 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/evaluation/pipeline/strategies/batchStrategy.js +238 -0
- package/dist/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/evaluation/pipeline/strategies/index.js +6 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/evaluation/pipeline/strategies/samplingStrategy.js +238 -0
- package/dist/evaluation/reporting/index.d.ts +6 -0
- package/dist/evaluation/reporting/index.js +6 -0
- package/dist/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/evaluation/reporting/metricsCollector.js +285 -0
- package/dist/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/evaluation/reporting/reportGenerator.js +374 -0
- package/dist/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/evaluation/scorers/baseScorer.js +232 -0
- package/dist/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/evaluation/scorers/customScorerUtils.js +381 -0
- package/dist/evaluation/scorers/index.d.ts +10 -0
- package/dist/evaluation/scorers/index.js +16 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/answerRelevancyScorer.js +99 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/evaluation/scorers/llm/baseLLMScorer.js +281 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/biasDetectionScorer.js +127 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextPrecisionScorer.js +92 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/contextRelevancyScorer.js +107 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/faithfulnessScorer.js +121 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/hallucinationScorer.js +140 -0
- package/dist/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/evaluation/scorers/llm/index.js +16 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/promptAlignmentScorer.js +106 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/summarizationScorer.js +114 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toneConsistencyScorer.js +106 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/evaluation/scorers/llm/toxicityScorer.js +121 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/evaluation/scorers/rule/baseRuleScorer.js +233 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/evaluation/scorers/rule/contentSimilarityScorer.js +350 -0
- package/dist/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/evaluation/scorers/rule/formatScorer.js +470 -0
- package/dist/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/evaluation/scorers/rule/index.js +10 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/evaluation/scorers/rule/keywordCoverageScorer.js +347 -0
- package/dist/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/evaluation/scorers/rule/lengthScorer.js +351 -0
- package/dist/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/evaluation/scorers/scorerBuilder.js +420 -0
- package/dist/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/evaluation/scorers/scorerRegistry.js +467 -0
- package/dist/index.d.ts +37 -25
- package/dist/index.js +65 -26
- package/dist/lib/auth/providers/BaseAuthProvider.d.ts +1 -1
- package/dist/lib/evaluation/BatchEvaluator.d.ts +163 -0
- package/dist/lib/evaluation/BatchEvaluator.js +268 -0
- package/dist/lib/evaluation/EvaluationAggregator.d.ts +272 -0
- package/dist/lib/evaluation/EvaluationAggregator.js +378 -0
- package/dist/lib/evaluation/EvaluatorFactory.d.ts +113 -0
- package/dist/lib/evaluation/EvaluatorFactory.js +281 -0
- package/dist/lib/evaluation/EvaluatorRegistry.d.ts +160 -0
- package/dist/lib/evaluation/EvaluatorRegistry.js +185 -0
- package/dist/lib/evaluation/errors/EvaluationError.d.ts +189 -0
- package/dist/lib/evaluation/errors/EvaluationError.js +207 -0
- package/dist/lib/evaluation/errors/index.d.ts +4 -0
- package/dist/lib/evaluation/errors/index.js +5 -0
- package/dist/lib/evaluation/hooks/index.d.ts +6 -0
- package/dist/lib/evaluation/hooks/index.js +7 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.d.ts +99 -0
- package/dist/lib/evaluation/hooks/langfuseAdapter.js +173 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.d.ts +129 -0
- package/dist/lib/evaluation/hooks/observabilityHooks.js +182 -0
- package/dist/lib/evaluation/index.d.ts +11 -2
- package/dist/lib/evaluation/index.js +15 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.d.ts +114 -0
- package/dist/lib/evaluation/pipeline/evaluationPipeline.js +382 -0
- package/dist/lib/evaluation/pipeline/index.d.ts +8 -0
- package/dist/lib/evaluation/pipeline/index.js +9 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.d.ts +126 -0
- package/dist/lib/evaluation/pipeline/pipelineBuilder.js +261 -0
- package/dist/lib/evaluation/pipeline/presets.d.ts +66 -0
- package/dist/lib/evaluation/pipeline/presets.js +225 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.d.ts +99 -0
- package/dist/lib/evaluation/pipeline/strategies/batchStrategy.js +239 -0
- package/dist/lib/evaluation/pipeline/strategies/index.d.ts +6 -0
- package/dist/lib/evaluation/pipeline/strategies/index.js +7 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.d.ts +76 -0
- package/dist/lib/evaluation/pipeline/strategies/samplingStrategy.js +239 -0
- package/dist/lib/evaluation/reporting/index.d.ts +6 -0
- package/dist/lib/evaluation/reporting/index.js +7 -0
- package/dist/lib/evaluation/reporting/metricsCollector.d.ts +147 -0
- package/dist/lib/evaluation/reporting/metricsCollector.js +286 -0
- package/dist/lib/evaluation/reporting/reportGenerator.d.ts +90 -0
- package/dist/lib/evaluation/reporting/reportGenerator.js +375 -0
- package/dist/lib/evaluation/scorers/baseScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/baseScorer.js +233 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.d.ts +95 -0
- package/dist/lib/evaluation/scorers/customScorerUtils.js +382 -0
- package/dist/lib/evaluation/scorers/index.d.ts +10 -0
- package/dist/lib/evaluation/scorers/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/answerRelevancyScorer.js +100 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.d.ts +71 -0
- package/dist/lib/evaluation/scorers/llm/baseLLMScorer.js +282 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/biasDetectionScorer.js +128 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextPrecisionScorer.js +93 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/contextRelevancyScorer.js +108 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/faithfulnessScorer.js +122 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/hallucinationScorer.js +141 -0
- package/dist/lib/evaluation/scorers/llm/index.d.ts +15 -0
- package/dist/lib/evaluation/scorers/llm/index.js +17 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/promptAlignmentScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/summarizationScorer.js +115 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toneConsistencyScorer.js +107 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.d.ts +12 -0
- package/dist/lib/evaluation/scorers/llm/toxicityScorer.js +122 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.d.ts +77 -0
- package/dist/lib/evaluation/scorers/rule/baseRuleScorer.js +234 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.d.ts +108 -0
- package/dist/lib/evaluation/scorers/rule/contentSimilarityScorer.js +351 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.d.ts +147 -0
- package/dist/lib/evaluation/scorers/rule/formatScorer.js +471 -0
- package/dist/lib/evaluation/scorers/rule/index.d.ts +9 -0
- package/dist/lib/evaluation/scorers/rule/index.js +11 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.d.ts +83 -0
- package/dist/lib/evaluation/scorers/rule/keywordCoverageScorer.js +348 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.d.ts +105 -0
- package/dist/lib/evaluation/scorers/rule/lengthScorer.js +352 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.d.ts +161 -0
- package/dist/lib/evaluation/scorers/scorerBuilder.js +421 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.d.ts +62 -0
- package/dist/lib/evaluation/scorers/scorerRegistry.js +468 -0
- package/dist/lib/index.d.ts +37 -25
- package/dist/lib/index.js +65 -26
- package/dist/lib/neurolink.d.ts +204 -0
- package/dist/lib/neurolink.js +296 -0
- package/dist/lib/types/index.d.ts +3 -1
- package/dist/lib/types/index.js +3 -2
- package/dist/lib/types/scorerTypes.d.ts +423 -0
- package/dist/lib/types/scorerTypes.js +6 -0
- package/dist/lib/utils/errorHandling.d.ts +20 -0
- package/dist/lib/utils/errorHandling.js +60 -0
- package/dist/neurolink.d.ts +204 -0
- package/dist/neurolink.js +296 -0
- package/dist/types/index.d.ts +3 -1
- package/dist/types/index.js +3 -2
- package/dist/types/scorerTypes.d.ts +423 -0
- package/dist/types/scorerTypes.js +5 -0
- package/dist/utils/errorHandling.d.ts +20 -0
- package/dist/utils/errorHandling.js +60 -0
- package/package.json +1 -1
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Langfuse Adapter
|
|
3
|
+
* Integration with Langfuse for LLM observability
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Langfuse client interface (minimal for type safety)
|
|
7
|
+
*/
|
|
8
|
+
export type LangfuseClient = {
|
|
9
|
+
score: (params: {
|
|
10
|
+
name: string;
|
|
11
|
+
value: number;
|
|
12
|
+
traceId?: string;
|
|
13
|
+
observationId?: string;
|
|
14
|
+
comment?: string;
|
|
15
|
+
metadata?: Record<string, unknown>;
|
|
16
|
+
}) => Promise<unknown>;
|
|
17
|
+
trace?: (params: {
|
|
18
|
+
name: string;
|
|
19
|
+
metadata?: Record<string, unknown>;
|
|
20
|
+
tags?: string[];
|
|
21
|
+
}) => {
|
|
22
|
+
id: string;
|
|
23
|
+
};
|
|
24
|
+
shutdown?: () => Promise<void>;
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Langfuse adapter configuration
|
|
28
|
+
*/
|
|
29
|
+
export type LangfuseAdapterConfig = {
|
|
30
|
+
/** Langfuse client instance */
|
|
31
|
+
client: LangfuseClient;
|
|
32
|
+
/** Prefix for score names */
|
|
33
|
+
scorePrefix?: string;
|
|
34
|
+
/** Include detailed metadata */
|
|
35
|
+
includeMetadata?: boolean;
|
|
36
|
+
/** Tags to add to all scores */
|
|
37
|
+
tags?: string[];
|
|
38
|
+
/** Whether to send pipeline-level scores */
|
|
39
|
+
sendPipelineScores?: boolean;
|
|
40
|
+
/** Whether to send individual scorer scores */
|
|
41
|
+
sendScorerScores?: boolean;
|
|
42
|
+
};
|
|
43
|
+
/**
|
|
44
|
+
* Langfuse adapter for evaluation observability
|
|
45
|
+
*/
|
|
46
|
+
export declare class LangfuseAdapter {
|
|
47
|
+
private _config;
|
|
48
|
+
private _unsubscribers;
|
|
49
|
+
private _traceIdMap;
|
|
50
|
+
constructor(config: LangfuseAdapterConfig);
|
|
51
|
+
/**
|
|
52
|
+
* Start listening to evaluation events
|
|
53
|
+
*/
|
|
54
|
+
start(): void;
|
|
55
|
+
/**
|
|
56
|
+
* Stop listening to events
|
|
57
|
+
*/
|
|
58
|
+
stop(): void;
|
|
59
|
+
/**
|
|
60
|
+
* Send scorer score to Langfuse
|
|
61
|
+
*/
|
|
62
|
+
private _sendScorerScore;
|
|
63
|
+
/**
|
|
64
|
+
* Send pipeline scores to Langfuse
|
|
65
|
+
*/
|
|
66
|
+
private _sendPipelineScores;
|
|
67
|
+
/**
|
|
68
|
+
* Manually send a score to Langfuse
|
|
69
|
+
*/
|
|
70
|
+
sendScore(name: string, value: number, options?: {
|
|
71
|
+
traceId?: string;
|
|
72
|
+
comment?: string;
|
|
73
|
+
metadata?: Record<string, unknown>;
|
|
74
|
+
}): Promise<void>;
|
|
75
|
+
/**
|
|
76
|
+
* Shutdown the adapter and flush any pending data
|
|
77
|
+
*/
|
|
78
|
+
shutdown(): Promise<void>;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Create a Langfuse adapter
|
|
82
|
+
*/
|
|
83
|
+
export declare function createLangfuseAdapter(config: LangfuseAdapterConfig): LangfuseAdapter;
|
|
84
|
+
/**
|
|
85
|
+
* Create and start a Langfuse adapter
|
|
86
|
+
*/
|
|
87
|
+
export declare function startLangfuseAdapter(config: LangfuseAdapterConfig): LangfuseAdapter;
|
|
88
|
+
/**
|
|
89
|
+
* Helper: Create a mock Langfuse client for testing
|
|
90
|
+
*/
|
|
91
|
+
export declare function createMockLangfuseClient(): LangfuseClient & {
|
|
92
|
+
scores: Array<{
|
|
93
|
+
name: string;
|
|
94
|
+
value: number;
|
|
95
|
+
traceId?: string;
|
|
96
|
+
comment?: string;
|
|
97
|
+
metadata?: Record<string, unknown>;
|
|
98
|
+
}>;
|
|
99
|
+
};
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Langfuse Adapter
|
|
3
|
+
* Integration with Langfuse for LLM observability
|
|
4
|
+
*/
|
|
5
|
+
import { logger } from "../../utils/logger.js";
|
|
6
|
+
import { observabilityHooks } from "./observabilityHooks.js";
|
|
7
|
+
/**
|
|
8
|
+
* Langfuse adapter for evaluation observability
|
|
9
|
+
*/
|
|
10
|
+
export class LangfuseAdapter {
|
|
11
|
+
_config;
|
|
12
|
+
_unsubscribers = [];
|
|
13
|
+
_traceIdMap = new Map();
|
|
14
|
+
constructor(config) {
|
|
15
|
+
this._config = {
|
|
16
|
+
scorePrefix: "eval",
|
|
17
|
+
includeMetadata: true,
|
|
18
|
+
tags: [],
|
|
19
|
+
sendPipelineScores: true,
|
|
20
|
+
sendScorerScores: true,
|
|
21
|
+
...config,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Start listening to evaluation events
|
|
26
|
+
*/
|
|
27
|
+
start() {
|
|
28
|
+
// Prevent duplicate subscriptions
|
|
29
|
+
if (this._unsubscribers.length > 0) {
|
|
30
|
+
return;
|
|
31
|
+
}
|
|
32
|
+
// Listen for scorer completions
|
|
33
|
+
if (this._config.sendScorerScores) {
|
|
34
|
+
const scorerUnsub = observabilityHooks.on("scorer:end", (event) => {
|
|
35
|
+
this._sendScorerScore(event.result, event.traceContext?.traceId);
|
|
36
|
+
});
|
|
37
|
+
this._unsubscribers.push(scorerUnsub);
|
|
38
|
+
}
|
|
39
|
+
// Listen for pipeline completions
|
|
40
|
+
if (this._config.sendPipelineScores) {
|
|
41
|
+
const pipelineUnsub = observabilityHooks.on("pipeline:end", (event) => {
|
|
42
|
+
this._sendPipelineScores(event.result, event.traceContext?.traceId);
|
|
43
|
+
});
|
|
44
|
+
this._unsubscribers.push(pipelineUnsub);
|
|
45
|
+
}
|
|
46
|
+
logger.debug("Langfuse adapter started");
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Stop listening to events
|
|
50
|
+
*/
|
|
51
|
+
stop() {
|
|
52
|
+
for (const unsub of this._unsubscribers) {
|
|
53
|
+
unsub();
|
|
54
|
+
}
|
|
55
|
+
this._unsubscribers = [];
|
|
56
|
+
this._traceIdMap.clear();
|
|
57
|
+
logger.debug("Langfuse adapter stopped");
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Send scorer score to Langfuse
|
|
61
|
+
*/
|
|
62
|
+
async _sendScorerScore(result, traceId) {
|
|
63
|
+
try {
|
|
64
|
+
const scoreName = `${this._config.scorePrefix}.${result.scorerId}`;
|
|
65
|
+
const normalizedValue = result.normalizedScore; // Already 0-1
|
|
66
|
+
await this._config.client.score({
|
|
67
|
+
name: scoreName,
|
|
68
|
+
value: normalizedValue,
|
|
69
|
+
traceId,
|
|
70
|
+
comment: result.reasoning,
|
|
71
|
+
metadata: this._config.includeMetadata
|
|
72
|
+
? {
|
|
73
|
+
passed: result.passed,
|
|
74
|
+
threshold: result.threshold,
|
|
75
|
+
computeTime: result.computeTime,
|
|
76
|
+
confidence: result.confidence,
|
|
77
|
+
...(result.metadata ?? {}),
|
|
78
|
+
}
|
|
79
|
+
: undefined,
|
|
80
|
+
});
|
|
81
|
+
logger.debug(`Sent score to Langfuse: ${scoreName}=${normalizedValue}`);
|
|
82
|
+
}
|
|
83
|
+
catch (error) {
|
|
84
|
+
logger.error("Failed to send score to Langfuse", {
|
|
85
|
+
scorerId: result.scorerId,
|
|
86
|
+
error: error instanceof Error ? error.message : String(error),
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Send pipeline scores to Langfuse
|
|
92
|
+
*/
|
|
93
|
+
async _sendPipelineScores(result, externalTraceId) {
|
|
94
|
+
const traceId = externalTraceId ?? result.correlationId;
|
|
95
|
+
const pipelineName = result.pipelineConfig.name ?? "unnamed";
|
|
96
|
+
try {
|
|
97
|
+
// Send overall pipeline score
|
|
98
|
+
await this._config.client.score({
|
|
99
|
+
name: `${this._config.scorePrefix}.pipeline.${pipelineName}.overall`,
|
|
100
|
+
value: result.overallScore / 10, // Normalize to 0-1
|
|
101
|
+
traceId,
|
|
102
|
+
comment: `Pipeline evaluation: ${result.passed ? "PASSED" : "FAILED"}`,
|
|
103
|
+
metadata: this._config.includeMetadata
|
|
104
|
+
? {
|
|
105
|
+
passed: result.passed,
|
|
106
|
+
aggregationMethod: result.aggregationMethod,
|
|
107
|
+
scorerCount: result.scores.length,
|
|
108
|
+
totalComputeTime: result.totalComputeTime,
|
|
109
|
+
errorCount: result.errors.length,
|
|
110
|
+
}
|
|
111
|
+
: undefined,
|
|
112
|
+
});
|
|
113
|
+
logger.debug(`Sent pipeline score to Langfuse: ${pipelineName}=${result.overallScore / 10}`);
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
logger.error("Failed to send pipeline score to Langfuse", {
|
|
117
|
+
pipelineName,
|
|
118
|
+
error: error instanceof Error ? error.message : String(error),
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Manually send a score to Langfuse
|
|
124
|
+
*/
|
|
125
|
+
async sendScore(name, value, options) {
|
|
126
|
+
const scoreName = `${this._config.scorePrefix}.${name}`;
|
|
127
|
+
await this._config.client.score({
|
|
128
|
+
name: scoreName,
|
|
129
|
+
value,
|
|
130
|
+
traceId: options?.traceId,
|
|
131
|
+
comment: options?.comment,
|
|
132
|
+
metadata: options?.metadata,
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Shutdown the adapter and flush any pending data
|
|
137
|
+
*/
|
|
138
|
+
async shutdown() {
|
|
139
|
+
this.stop();
|
|
140
|
+
if (this._config.client.shutdown) {
|
|
141
|
+
await this._config.client.shutdown();
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Create a Langfuse adapter
|
|
147
|
+
*/
|
|
148
|
+
export function createLangfuseAdapter(config) {
|
|
149
|
+
return new LangfuseAdapter(config);
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Create and start a Langfuse adapter
|
|
153
|
+
*/
|
|
154
|
+
export function startLangfuseAdapter(config) {
|
|
155
|
+
const adapter = new LangfuseAdapter(config);
|
|
156
|
+
adapter.start();
|
|
157
|
+
return adapter;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Helper: Create a mock Langfuse client for testing
|
|
161
|
+
*/
|
|
162
|
+
export function createMockLangfuseClient() {
|
|
163
|
+
const scores = [];
|
|
164
|
+
return {
|
|
165
|
+
scores,
|
|
166
|
+
score: async (params) => {
|
|
167
|
+
scores.push(params);
|
|
168
|
+
return { id: `score-${scores.length}` };
|
|
169
|
+
},
|
|
170
|
+
shutdown: async () => { },
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
//# sourceMappingURL=langfuseAdapter.js.map
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Observability Hooks
|
|
3
|
+
* OpenTelemetry integration for evaluation tracing
|
|
4
|
+
*/
|
|
5
|
+
import type { EvaluationTraceContext, ScoreResult } from "../../types/scorerTypes.js";
|
|
6
|
+
import type { PipelineResult } from "../pipeline/evaluationPipeline.js";
|
|
7
|
+
/**
|
|
8
|
+
* Event handler type
|
|
9
|
+
*/
|
|
10
|
+
export type EventHandler<T> = (event: T) => void | Promise<void>;
|
|
11
|
+
/**
|
|
12
|
+
* Evaluation events
|
|
13
|
+
*/
|
|
14
|
+
export type EvaluationEvents = {
|
|
15
|
+
"scorer:start": {
|
|
16
|
+
scorerId: string;
|
|
17
|
+
scorerName: string;
|
|
18
|
+
timestamp: number;
|
|
19
|
+
traceContext?: EvaluationTraceContext;
|
|
20
|
+
};
|
|
21
|
+
"scorer:end": {
|
|
22
|
+
scorerId: string;
|
|
23
|
+
scorerName: string;
|
|
24
|
+
result: ScoreResult;
|
|
25
|
+
timestamp: number;
|
|
26
|
+
duration: number;
|
|
27
|
+
traceContext?: EvaluationTraceContext;
|
|
28
|
+
};
|
|
29
|
+
"scorer:error": {
|
|
30
|
+
scorerId: string;
|
|
31
|
+
scorerName: string;
|
|
32
|
+
error: string;
|
|
33
|
+
timestamp: number;
|
|
34
|
+
traceContext?: EvaluationTraceContext;
|
|
35
|
+
};
|
|
36
|
+
"pipeline:start": {
|
|
37
|
+
pipelineName: string;
|
|
38
|
+
scorerCount: number;
|
|
39
|
+
timestamp: number;
|
|
40
|
+
correlationId: string;
|
|
41
|
+
traceContext?: EvaluationTraceContext;
|
|
42
|
+
};
|
|
43
|
+
"pipeline:end": {
|
|
44
|
+
pipelineName: string;
|
|
45
|
+
result: PipelineResult;
|
|
46
|
+
timestamp: number;
|
|
47
|
+
duration: number;
|
|
48
|
+
traceContext?: EvaluationTraceContext;
|
|
49
|
+
};
|
|
50
|
+
"pipeline:error": {
|
|
51
|
+
pipelineName: string;
|
|
52
|
+
error: string;
|
|
53
|
+
timestamp: number;
|
|
54
|
+
traceContext?: EvaluationTraceContext;
|
|
55
|
+
};
|
|
56
|
+
};
|
|
57
|
+
/**
|
|
58
|
+
* Observability hooks manager
|
|
59
|
+
*/
|
|
60
|
+
export declare class ObservabilityHooks {
|
|
61
|
+
private _handlers;
|
|
62
|
+
private _traceContext?;
|
|
63
|
+
private _enabled;
|
|
64
|
+
/**
|
|
65
|
+
* Enable/disable observability
|
|
66
|
+
*/
|
|
67
|
+
set enabled(value: boolean);
|
|
68
|
+
get enabled(): boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Set trace context for all events
|
|
71
|
+
*/
|
|
72
|
+
setTraceContext(context: EvaluationTraceContext): void;
|
|
73
|
+
/**
|
|
74
|
+
* Clear trace context
|
|
75
|
+
*/
|
|
76
|
+
clearTraceContext(): void;
|
|
77
|
+
/**
|
|
78
|
+
* Get current trace context
|
|
79
|
+
*/
|
|
80
|
+
getTraceContext(): EvaluationTraceContext | undefined;
|
|
81
|
+
/**
|
|
82
|
+
* Register an event handler
|
|
83
|
+
*/
|
|
84
|
+
on<K extends keyof EvaluationEvents>(event: K, handler: EventHandler<EvaluationEvents[K]>): () => void;
|
|
85
|
+
/**
|
|
86
|
+
* Remove an event handler
|
|
87
|
+
*/
|
|
88
|
+
off<K extends keyof EvaluationEvents>(event: K, handler: EventHandler<EvaluationEvents[K]>): void;
|
|
89
|
+
/**
|
|
90
|
+
* Emit an event
|
|
91
|
+
*/
|
|
92
|
+
emit<K extends keyof EvaluationEvents>(event: K, data: Omit<EvaluationEvents[K], "traceContext">): Promise<void>;
|
|
93
|
+
/**
|
|
94
|
+
* Clear all handlers
|
|
95
|
+
*/
|
|
96
|
+
clear(): void;
|
|
97
|
+
/**
|
|
98
|
+
* Get handler count for an event
|
|
99
|
+
*/
|
|
100
|
+
listenerCount(event: keyof EvaluationEvents): number;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Global observability hooks instance
|
|
104
|
+
*/
|
|
105
|
+
export declare const observabilityHooks: ObservabilityHooks;
|
|
106
|
+
/**
|
|
107
|
+
* Helper: Create a console logger hook
|
|
108
|
+
*/
|
|
109
|
+
export declare function createConsoleLoggerHook(): void;
|
|
110
|
+
/**
|
|
111
|
+
* Helper: Create a metrics collector hook
|
|
112
|
+
* Accepts the actual MetricsCollector interface from reporting/metricsCollector
|
|
113
|
+
*/
|
|
114
|
+
export declare function createMetricsCollectorHook(collector: {
|
|
115
|
+
recordScorer: (scorerId: string, scorerName: string, result: ScoreResult) => void;
|
|
116
|
+
recordPipeline: (result: PipelineResult) => void;
|
|
117
|
+
}): void;
|
|
118
|
+
/**
|
|
119
|
+
* OpenTelemetry span attributes
|
|
120
|
+
*/
|
|
121
|
+
export type SpanAttributes = Record<string, string | number | boolean>;
|
|
122
|
+
/**
|
|
123
|
+
* Create span attributes from scorer result
|
|
124
|
+
*/
|
|
125
|
+
export declare function scorerToSpanAttributes(result: ScoreResult): SpanAttributes;
|
|
126
|
+
/**
|
|
127
|
+
* Create span attributes from pipeline result
|
|
128
|
+
*/
|
|
129
|
+
export declare function pipelineToSpanAttributes(result: PipelineResult): SpanAttributes;
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Observability Hooks
|
|
3
|
+
* OpenTelemetry integration for evaluation tracing
|
|
4
|
+
*/
|
|
5
|
+
import { logger } from "../../utils/logger.js";
|
|
6
|
+
/**
|
|
7
|
+
* Observability hooks manager
|
|
8
|
+
*/
|
|
9
|
+
export class ObservabilityHooks {
|
|
10
|
+
_handlers = new Map();
|
|
11
|
+
_traceContext;
|
|
12
|
+
_enabled = true;
|
|
13
|
+
/**
|
|
14
|
+
* Enable/disable observability
|
|
15
|
+
*/
|
|
16
|
+
set enabled(value) {
|
|
17
|
+
this._enabled = value;
|
|
18
|
+
}
|
|
19
|
+
get enabled() {
|
|
20
|
+
return this._enabled;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Set trace context for all events
|
|
24
|
+
*/
|
|
25
|
+
setTraceContext(context) {
|
|
26
|
+
this._traceContext = context;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Clear trace context
|
|
30
|
+
*/
|
|
31
|
+
clearTraceContext() {
|
|
32
|
+
this._traceContext = undefined;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Get current trace context
|
|
36
|
+
*/
|
|
37
|
+
getTraceContext() {
|
|
38
|
+
return this._traceContext;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Register an event handler
|
|
42
|
+
*/
|
|
43
|
+
on(event, handler) {
|
|
44
|
+
if (!this._handlers.has(event)) {
|
|
45
|
+
this._handlers.set(event, new Set());
|
|
46
|
+
}
|
|
47
|
+
this._handlers.get(event).add(handler);
|
|
48
|
+
// Return unsubscribe function
|
|
49
|
+
return () => {
|
|
50
|
+
this._handlers.get(event)?.delete(handler);
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Remove an event handler
|
|
55
|
+
*/
|
|
56
|
+
off(event, handler) {
|
|
57
|
+
this._handlers.get(event)?.delete(handler);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Emit an event
|
|
61
|
+
*/
|
|
62
|
+
async emit(event, data) {
|
|
63
|
+
if (!this._enabled) {
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
const handlers = this._handlers.get(event);
|
|
67
|
+
if (!handlers || handlers.size === 0) {
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
const eventData = {
|
|
71
|
+
...data,
|
|
72
|
+
traceContext: this._traceContext,
|
|
73
|
+
};
|
|
74
|
+
const promises = [];
|
|
75
|
+
for (const handler of handlers) {
|
|
76
|
+
try {
|
|
77
|
+
const result = handler(eventData);
|
|
78
|
+
if (result instanceof Promise) {
|
|
79
|
+
promises.push(result.catch((err) => {
|
|
80
|
+
logger.error(`Event handler error for ${event}`, { error: err });
|
|
81
|
+
}));
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
catch (error) {
|
|
85
|
+
logger.error(`Event handler error for ${event}`, { error });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Wait for async handlers
|
|
89
|
+
await Promise.all(promises);
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Clear all handlers
|
|
93
|
+
*/
|
|
94
|
+
clear() {
|
|
95
|
+
this._handlers.clear();
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Get handler count for an event
|
|
99
|
+
*/
|
|
100
|
+
listenerCount(event) {
|
|
101
|
+
return this._handlers.get(event)?.size ?? 0;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Global observability hooks instance
|
|
106
|
+
*/
|
|
107
|
+
export const observabilityHooks = new ObservabilityHooks();
|
|
108
|
+
/**
|
|
109
|
+
* Helper: Create a console logger hook
|
|
110
|
+
*/
|
|
111
|
+
export function createConsoleLoggerHook() {
|
|
112
|
+
observabilityHooks.on("scorer:start", (event) => {
|
|
113
|
+
logger.info(`[SCORER] ${event.scorerName} started at ${new Date(event.timestamp).toISOString()}`);
|
|
114
|
+
});
|
|
115
|
+
observabilityHooks.on("scorer:end", (event) => {
|
|
116
|
+
logger.info(`[SCORER] ${event.scorerName} completed: score=${event.result.score.toFixed(1)}, ` +
|
|
117
|
+
`passed=${event.result.passed}, duration=${event.duration}ms`);
|
|
118
|
+
});
|
|
119
|
+
observabilityHooks.on("scorer:error", (event) => {
|
|
120
|
+
logger.error(`[SCORER] ${event.scorerName} error: ${event.error}`);
|
|
121
|
+
});
|
|
122
|
+
observabilityHooks.on("pipeline:start", (event) => {
|
|
123
|
+
logger.info(`[PIPELINE] ${event.pipelineName} started with ${event.scorerCount} scorers ` +
|
|
124
|
+
`(correlationId: ${event.correlationId})`);
|
|
125
|
+
});
|
|
126
|
+
observabilityHooks.on("pipeline:end", (event) => {
|
|
127
|
+
logger.info(`[PIPELINE] ${event.pipelineName} completed: overall=${event.result.overallScore.toFixed(1)}, ` +
|
|
128
|
+
`passed=${event.result.passed}, duration=${event.duration}ms`);
|
|
129
|
+
});
|
|
130
|
+
observabilityHooks.on("pipeline:error", (event) => {
|
|
131
|
+
logger.error(`[PIPELINE] ${event.pipelineName} error: ${event.error}`);
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
/**
|
|
135
|
+
* Helper: Create a metrics collector hook
|
|
136
|
+
* Accepts the actual MetricsCollector interface from reporting/metricsCollector
|
|
137
|
+
*/
|
|
138
|
+
export function createMetricsCollectorHook(collector) {
|
|
139
|
+
observabilityHooks.on("scorer:end", (event) => {
|
|
140
|
+
collector.recordScorer(event.scorerId, event.scorerName, event.result);
|
|
141
|
+
});
|
|
142
|
+
observabilityHooks.on("pipeline:end", (event) => {
|
|
143
|
+
collector.recordPipeline(event.result);
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Create span attributes from scorer result
|
|
148
|
+
*/
|
|
149
|
+
export function scorerToSpanAttributes(result) {
|
|
150
|
+
return {
|
|
151
|
+
"scorer.id": result.scorerId,
|
|
152
|
+
"scorer.name": result.scorerName,
|
|
153
|
+
"scorer.score": result.score,
|
|
154
|
+
"scorer.normalizedScore": result.normalizedScore,
|
|
155
|
+
"scorer.passed": result.passed,
|
|
156
|
+
"scorer.threshold": result.threshold,
|
|
157
|
+
"scorer.computeTime": result.computeTime,
|
|
158
|
+
...(result.confidence !== undefined && {
|
|
159
|
+
"scorer.confidence": result.confidence,
|
|
160
|
+
}),
|
|
161
|
+
...(result.error && { "scorer.error": result.error }),
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Create span attributes from pipeline result
|
|
166
|
+
*/
|
|
167
|
+
export function pipelineToSpanAttributes(result) {
|
|
168
|
+
return {
|
|
169
|
+
"pipeline.name": result.pipelineConfig.name ?? "unnamed",
|
|
170
|
+
"pipeline.overallScore": result.overallScore,
|
|
171
|
+
"pipeline.passed": result.passed,
|
|
172
|
+
"pipeline.aggregationMethod": result.aggregationMethod,
|
|
173
|
+
"pipeline.scorerCount": result.scores.length,
|
|
174
|
+
"pipeline.totalComputeTime": result.totalComputeTime,
|
|
175
|
+
"pipeline.errorCount": result.errors.length,
|
|
176
|
+
"pipeline.skippedCount": result.skippedScorers.length,
|
|
177
|
+
...(result.correlationId && {
|
|
178
|
+
"pipeline.correlationId": result.correlationId,
|
|
179
|
+
}),
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
//# sourceMappingURL=observabilityHooks.js.map
|
|
@@ -2,10 +2,19 @@
|
|
|
2
2
|
* @file This file exports the main Evaluator class, which serves as the central entry point for the evaluation system.
|
|
3
3
|
*/
|
|
4
4
|
import type { LanguageModelV3CallOptions } from "@ai-sdk/provider";
|
|
5
|
-
import type {
|
|
5
|
+
import type { EvaluationData } from "../types/evaluation.js";
|
|
6
6
|
import type { EvaluationConfig } from "../types/evaluationTypes.js";
|
|
7
|
+
import type { GenerateResult } from "../types/generateTypes.js";
|
|
7
8
|
import type { AutoEvaluationConfig } from "../types/middlewareTypes.js";
|
|
8
|
-
|
|
9
|
+
export * from "./errors/index.js";
|
|
10
|
+
export * from "./hooks/index.js";
|
|
11
|
+
export * from "./pipeline/index.js";
|
|
12
|
+
export * from "./reporting/index.js";
|
|
13
|
+
export * from "./scorers/index.js";
|
|
14
|
+
export { BatchEvaluator, type BatchEvaluationConfig, type BatchEvaluationItem, type BatchEvaluationItemResult, type BatchEvaluationResult, } from "./BatchEvaluator.js";
|
|
15
|
+
export { EvaluationAggregator, type ScoreStatistics, type ScoreDistribution, type TrendAnalysis, type DimensionAnalysis, type AlertSummary, type AggregationResult, } from "./EvaluationAggregator.js";
|
|
16
|
+
export { EvaluatorFactory, getEvaluatorFactory, type EvaluatorPreset, } from "./EvaluatorFactory.js";
|
|
17
|
+
export { EvaluatorRegistry, getEvaluatorRegistry, type EvaluationStrategyFunction, type EvaluationStrategyConfig, type EvaluationStrategyMetadata, } from "./EvaluatorRegistry.js";
|
|
9
18
|
/**
|
|
10
19
|
* A centralized class for performing response evaluations. It supports different
|
|
11
20
|
* evaluation strategies, with RAGAS-style model-based evaluation as the default.
|
|
@@ -4,6 +4,21 @@
|
|
|
4
4
|
import { ContextBuilder } from "./contextBuilder.js";
|
|
5
5
|
import { RAGASEvaluator } from "./ragasEvaluator.js";
|
|
6
6
|
import { mapToEvaluationData } from "./scoring.js";
|
|
7
|
+
// Re-export errors
|
|
8
|
+
export * from "./errors/index.js";
|
|
9
|
+
// Re-export hooks
|
|
10
|
+
export * from "./hooks/index.js";
|
|
11
|
+
// Re-export pipeline
|
|
12
|
+
export * from "./pipeline/index.js";
|
|
13
|
+
// Re-export reporting
|
|
14
|
+
export * from "./reporting/index.js";
|
|
15
|
+
// Re-export scorers
|
|
16
|
+
export * from "./scorers/index.js";
|
|
17
|
+
// Re-export Factory and Registry (Mastra-inspired patterns)
|
|
18
|
+
export { BatchEvaluator, } from "./BatchEvaluator.js";
|
|
19
|
+
export { EvaluationAggregator, } from "./EvaluationAggregator.js";
|
|
20
|
+
export { EvaluatorFactory, getEvaluatorFactory, } from "./EvaluatorFactory.js";
|
|
21
|
+
export { EvaluatorRegistry, getEvaluatorRegistry, } from "./EvaluatorRegistry.js";
|
|
7
22
|
/**
|
|
8
23
|
* A centralized class for performing response evaluations. It supports different
|
|
9
24
|
* evaluation strategies, with RAGAS-style model-based evaluation as the default.
|