@thinkhive/sdk 3.1.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +279 -128
- package/dist/api/apiKeys.d.ts +252 -0
- package/dist/api/apiKeys.js +298 -0
- package/dist/api/business-metrics.d.ts +188 -0
- package/dist/api/business-metrics.js +213 -0
- package/dist/api/conversation-eval.d.ts +200 -0
- package/dist/api/conversation-eval.js +235 -0
- package/dist/api/deterministic-graders.d.ts +205 -0
- package/dist/api/deterministic-graders.js +191 -0
- package/dist/api/eval-health.d.ts +250 -0
- package/dist/api/eval-health.js +224 -0
- package/dist/api/human-review.d.ts +275 -0
- package/dist/api/human-review.js +236 -0
- package/dist/api/nondeterminism.d.ts +300 -0
- package/dist/api/nondeterminism.js +250 -0
- package/dist/api/quality-metrics.d.ts +303 -0
- package/dist/api/quality-metrics.js +198 -0
- package/dist/api/roi-analytics.d.ts +263 -0
- package/dist/api/roi-analytics.js +204 -0
- package/dist/api/transcript-patterns.d.ts +204 -0
- package/dist/api/transcript-patterns.js +227 -0
- package/dist/core/client.d.ts +82 -8
- package/dist/core/client.js +223 -32
- package/dist/core/config.d.ts +1 -1
- package/dist/core/config.js +2 -2
- package/dist/core/types.d.ts +27 -2
- package/dist/core/types.js +1 -1
- package/dist/index.d.ts +415 -62
- package/dist/index.js +253 -37
- package/package.json +8 -4
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ThinkHive SDK v3.1 - Quality Metrics API
|
|
3
|
+
*
|
|
4
|
+
* RAG Evaluation & Hallucination Detection for AI quality assurance
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Retrieved context for RAG evaluation
|
|
8
|
+
*/
|
|
9
|
+
export interface RetrievedContext {
|
|
10
|
+
content: string;
|
|
11
|
+
chunkIndex?: number;
|
|
12
|
+
metadata?: Record<string, unknown>;
|
|
13
|
+
score?: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Ground truth context
|
|
17
|
+
*/
|
|
18
|
+
export interface GroundTruthContext {
|
|
19
|
+
content: string;
|
|
20
|
+
chunkIndex?: number;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Grounded span evidence
|
|
24
|
+
*/
|
|
25
|
+
export interface GroundedSpan {
|
|
26
|
+
text: string;
|
|
27
|
+
confidence: number;
|
|
28
|
+
sourceChunkIndex?: number;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Ungrounded span evidence
|
|
32
|
+
*/
|
|
33
|
+
export interface UngroundedSpan {
|
|
34
|
+
text: string;
|
|
35
|
+
confidence: number;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Citation mapping
|
|
39
|
+
*/
|
|
40
|
+
export interface CitationMap {
|
|
41
|
+
claim: string;
|
|
42
|
+
citedIndex: number;
|
|
43
|
+
isValid: boolean;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* RAG evaluation result
|
|
47
|
+
*/
|
|
48
|
+
export interface RAGEvaluation {
|
|
49
|
+
contextRelevance: number;
|
|
50
|
+
contextPrecision: number;
|
|
51
|
+
contextRecall: number;
|
|
52
|
+
groundedness: number;
|
|
53
|
+
faithfulness: number;
|
|
54
|
+
answerRelevance: number;
|
|
55
|
+
citationAccuracy: number;
|
|
56
|
+
citationCompleteness: number;
|
|
57
|
+
overallScore: number;
|
|
58
|
+
grade: 'A' | 'B' | 'C' | 'D' | 'F';
|
|
59
|
+
groundedSpanCount?: number;
|
|
60
|
+
ungroundedSpanCount?: number;
|
|
61
|
+
issues: string[];
|
|
62
|
+
recommendations: string[];
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* RAG evaluation evidence
|
|
66
|
+
*/
|
|
67
|
+
export interface RAGEvidence {
|
|
68
|
+
groundedSpans: GroundedSpan[];
|
|
69
|
+
ungroundedSpans: UngroundedSpan[];
|
|
70
|
+
citationMap: CitationMap[];
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Hallucination instance
|
|
74
|
+
*/
|
|
75
|
+
export interface HallucinationInstance {
|
|
76
|
+
type: string;
|
|
77
|
+
severity: 'low' | 'medium' | 'high' | 'critical';
|
|
78
|
+
text: string;
|
|
79
|
+
explanation: string;
|
|
80
|
+
confidence: number;
|
|
81
|
+
suggestedFix?: string;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Hallucination detection report
|
|
85
|
+
*/
|
|
86
|
+
export interface HallucinationReport {
|
|
87
|
+
hasHallucinations: boolean;
|
|
88
|
+
hallucinationScore: number;
|
|
89
|
+
riskLevel: 'low' | 'medium' | 'high' | 'critical';
|
|
90
|
+
factualClaims: number;
|
|
91
|
+
verifiedClaims: number;
|
|
92
|
+
unverifiedClaims: number;
|
|
93
|
+
summary: string;
|
|
94
|
+
recommendations: string[];
|
|
95
|
+
instances: HallucinationInstance[];
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Groundedness analysis result
|
|
99
|
+
*/
|
|
100
|
+
export interface GroundednessResult {
|
|
101
|
+
score: number;
|
|
102
|
+
faithfulness: number;
|
|
103
|
+
contextRelevance: number;
|
|
104
|
+
grade: string;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Batch evaluation result for a single trace
|
|
108
|
+
*/
|
|
109
|
+
export interface BatchEvaluationResult {
|
|
110
|
+
traceId: string;
|
|
111
|
+
success: boolean;
|
|
112
|
+
error?: string;
|
|
113
|
+
rag?: {
|
|
114
|
+
score: number;
|
|
115
|
+
grade: string;
|
|
116
|
+
mainIssue?: string;
|
|
117
|
+
};
|
|
118
|
+
hallucination?: {
|
|
119
|
+
hasIssues: boolean;
|
|
120
|
+
score: number;
|
|
121
|
+
topIssue?: string;
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Batch evaluation summary
|
|
126
|
+
*/
|
|
127
|
+
export interface BatchEvaluationSummary {
|
|
128
|
+
totalTraces: number;
|
|
129
|
+
successfulEvaluations: number;
|
|
130
|
+
avgRagScore: number;
|
|
131
|
+
hallucinationRate: number;
|
|
132
|
+
gradeDistribution: {
|
|
133
|
+
A: number;
|
|
134
|
+
B: number;
|
|
135
|
+
C: number;
|
|
136
|
+
D: number;
|
|
137
|
+
F: number;
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Quality Metrics API client for RAG evaluation and hallucination detection
|
|
142
|
+
*/
|
|
143
|
+
export declare const qualityMetrics: {
|
|
144
|
+
/**
|
|
145
|
+
* Get RAG quality scores for a specific trace
|
|
146
|
+
*
|
|
147
|
+
* @example
|
|
148
|
+
* ```typescript
|
|
149
|
+
* const scores = await qualityMetrics.getRagScores('trace_abc123');
|
|
150
|
+
* console.log(`Groundedness: ${scores.evaluation.groundedness}`);
|
|
151
|
+
* console.log(`Grade: ${scores.evaluation.grade}`);
|
|
152
|
+
* ```
|
|
153
|
+
*/
|
|
154
|
+
getRagScores(traceId: string): Promise<{
|
|
155
|
+
traceId: string;
|
|
156
|
+
evaluation: RAGEvaluation;
|
|
157
|
+
evidence: RAGEvidence;
|
|
158
|
+
}>;
|
|
159
|
+
/**
|
|
160
|
+
* Get hallucination detection report for a trace
|
|
161
|
+
*
|
|
162
|
+
* @example
|
|
163
|
+
* ```typescript
|
|
164
|
+
* const report = await qualityMetrics.getHallucinationReport('trace_abc123');
|
|
165
|
+
* if (report.report.hasHallucinations) {
|
|
166
|
+
* console.log(`Risk level: ${report.report.riskLevel}`);
|
|
167
|
+
* for (const instance of report.report.instances) {
|
|
168
|
+
* console.log(`- ${instance.type}: ${instance.text}`);
|
|
169
|
+
* }
|
|
170
|
+
* }
|
|
171
|
+
* ```
|
|
172
|
+
*/
|
|
173
|
+
getHallucinationReport(traceId: string): Promise<{
|
|
174
|
+
traceId: string;
|
|
175
|
+
report: HallucinationReport;
|
|
176
|
+
}>;
|
|
177
|
+
/**
|
|
178
|
+
* Evaluate RAG quality for provided content (ad-hoc evaluation)
|
|
179
|
+
*
|
|
180
|
+
* @example
|
|
181
|
+
* ```typescript
|
|
182
|
+
* const result = await qualityMetrics.evaluateRag({
|
|
183
|
+
* query: 'What is the refund policy?',
|
|
184
|
+
* response: 'You can get a refund within 30 days.',
|
|
185
|
+
* retrievedContexts: [
|
|
186
|
+
* { content: 'Our refund policy allows returns within 30 days of purchase.' },
|
|
187
|
+
* ],
|
|
188
|
+
* });
|
|
189
|
+
* console.log(`Groundedness: ${result.evaluation.groundedness}`);
|
|
190
|
+
* ```
|
|
191
|
+
*/
|
|
192
|
+
evaluateRag(input: {
|
|
193
|
+
query: string;
|
|
194
|
+
response: string;
|
|
195
|
+
retrievedContexts: RetrievedContext[];
|
|
196
|
+
groundTruthContexts?: GroundTruthContext[];
|
|
197
|
+
citations?: string[];
|
|
198
|
+
}): Promise<{
|
|
199
|
+
evaluation: RAGEvaluation;
|
|
200
|
+
evidence: RAGEvidence;
|
|
201
|
+
}>;
|
|
202
|
+
/**
|
|
203
|
+
* Detect hallucinations in provided content (ad-hoc detection)
|
|
204
|
+
*
|
|
205
|
+
* @example
|
|
206
|
+
* ```typescript
|
|
207
|
+
* const result = await qualityMetrics.detectHallucinations({
|
|
208
|
+
* response: 'The product costs $99 and comes with a 2-year warranty.',
|
|
209
|
+
* contexts: [
|
|
210
|
+
* { content: 'The product costs $99 with a 1-year warranty.' },
|
|
211
|
+
* ],
|
|
212
|
+
* });
|
|
213
|
+
* if (result.report.hasHallucinations) {
|
|
214
|
+
* console.log('Detected hallucinations:', result.report.instances);
|
|
215
|
+
* }
|
|
216
|
+
* ```
|
|
217
|
+
*/
|
|
218
|
+
detectHallucinations(input: {
|
|
219
|
+
response: string;
|
|
220
|
+
contexts: Array<{
|
|
221
|
+
content: string;
|
|
222
|
+
metadata?: Record<string, unknown>;
|
|
223
|
+
}>;
|
|
224
|
+
query?: string;
|
|
225
|
+
previousResponses?: string[];
|
|
226
|
+
}): Promise<{
|
|
227
|
+
report: HallucinationReport;
|
|
228
|
+
}>;
|
|
229
|
+
/**
|
|
230
|
+
* Get groundedness analysis for a trace
|
|
231
|
+
*
|
|
232
|
+
* @example
|
|
233
|
+
* ```typescript
|
|
234
|
+
* const result = await qualityMetrics.getGroundedness('trace_abc123');
|
|
235
|
+
* console.log(`Groundedness score: ${result.groundedness.score}`);
|
|
236
|
+
* console.log(`Grounded spans: ${result.summary.groundedSpans}`);
|
|
237
|
+
* ```
|
|
238
|
+
*/
|
|
239
|
+
getGroundedness(traceId: string): Promise<{
|
|
240
|
+
traceId: string;
|
|
241
|
+
groundedness: GroundednessResult;
|
|
242
|
+
spans: {
|
|
243
|
+
grounded: Array<{
|
|
244
|
+
text: string;
|
|
245
|
+
confidence: number;
|
|
246
|
+
sourceIndex: number;
|
|
247
|
+
}>;
|
|
248
|
+
ungrounded: Array<{
|
|
249
|
+
text: string;
|
|
250
|
+
confidence: number;
|
|
251
|
+
}>;
|
|
252
|
+
};
|
|
253
|
+
summary: {
|
|
254
|
+
totalSpans: number;
|
|
255
|
+
groundedSpans: number;
|
|
256
|
+
ungroundedSpans: number;
|
|
257
|
+
groundednessRatio: number;
|
|
258
|
+
};
|
|
259
|
+
}>;
|
|
260
|
+
/**
|
|
261
|
+
* Evaluate multiple traces for quality metrics in batch
|
|
262
|
+
*
|
|
263
|
+
* @example
|
|
264
|
+
* ```typescript
|
|
265
|
+
* const result = await qualityMetrics.evaluateBatch({
|
|
266
|
+
* traceIds: ['trace_1', 'trace_2', 'trace_3'],
|
|
267
|
+
* });
|
|
268
|
+
* console.log(`Average RAG score: ${result.summary.avgRagScore}`);
|
|
269
|
+
* console.log(`Hallucination rate: ${result.summary.hallucinationRate}%`);
|
|
270
|
+
* ```
|
|
271
|
+
*/
|
|
272
|
+
evaluateBatch(options: {
|
|
273
|
+
traceIds: string[];
|
|
274
|
+
includeDetails?: boolean;
|
|
275
|
+
}): Promise<{
|
|
276
|
+
summary: BatchEvaluationSummary;
|
|
277
|
+
results: BatchEvaluationResult[];
|
|
278
|
+
}>;
|
|
279
|
+
};
|
|
280
|
+
/**
|
|
281
|
+
* Check if a RAG evaluation passes quality thresholds
|
|
282
|
+
*/
|
|
283
|
+
export declare function passesQualityThreshold(evaluation: RAGEvaluation, thresholds?: {
|
|
284
|
+
minGroundedness?: number;
|
|
285
|
+
minOverallScore?: number;
|
|
286
|
+
minGrade?: 'A' | 'B' | 'C' | 'D';
|
|
287
|
+
}): boolean;
|
|
288
|
+
/**
|
|
289
|
+
* Check if hallucination risk is acceptable
|
|
290
|
+
*/
|
|
291
|
+
export declare function isHallucinationRiskAcceptable(report: HallucinationReport, maxRiskLevel?: 'low' | 'medium' | 'high'): boolean;
|
|
292
|
+
/**
|
|
293
|
+
* Get quality recommendations based on evaluation
|
|
294
|
+
*/
|
|
295
|
+
export declare function getQualityRecommendations(ragEval: RAGEvaluation, hallucinationReport?: HallucinationReport): string[];
|
|
296
|
+
/**
|
|
297
|
+
* Format quality score for display
|
|
298
|
+
*/
|
|
299
|
+
export declare function formatQualityScore(score: number): string;
|
|
300
|
+
/**
|
|
301
|
+
* Get color indicator for grade
|
|
302
|
+
*/
|
|
303
|
+
export declare function getGradeColor(grade: 'A' | 'B' | 'C' | 'D' | 'F'): 'green' | 'blue' | 'yellow' | 'orange' | 'red';
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ThinkHive SDK v3.1 - Quality Metrics API
|
|
4
|
+
*
|
|
5
|
+
* RAG Evaluation & Hallucination Detection for AI quality assurance
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.qualityMetrics = void 0;
|
|
9
|
+
exports.passesQualityThreshold = passesQualityThreshold;
|
|
10
|
+
exports.isHallucinationRiskAcceptable = isHallucinationRiskAcceptable;
|
|
11
|
+
exports.getQualityRecommendations = getQualityRecommendations;
|
|
12
|
+
exports.formatQualityScore = formatQualityScore;
|
|
13
|
+
exports.getGradeColor = getGradeColor;
|
|
14
|
+
const client_1 = require("../core/client");
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// QUALITY METRICS API CLIENT
|
|
17
|
+
// ============================================================================
|
|
18
|
+
/**
|
|
19
|
+
* Quality Metrics API client for RAG evaluation and hallucination detection
|
|
20
|
+
*/
|
|
21
|
+
exports.qualityMetrics = {
|
|
22
|
+
/**
|
|
23
|
+
* Get RAG quality scores for a specific trace
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* const scores = await qualityMetrics.getRagScores('trace_abc123');
|
|
28
|
+
* console.log(`Groundedness: ${scores.evaluation.groundedness}`);
|
|
29
|
+
* console.log(`Grade: ${scores.evaluation.grade}`);
|
|
30
|
+
* ```
|
|
31
|
+
*/
|
|
32
|
+
async getRagScores(traceId) {
|
|
33
|
+
return (0, client_1.apiRequestWithData)(`/quality/rag-scores/${traceId}`, {
|
|
34
|
+
apiVersion: 'v1',
|
|
35
|
+
});
|
|
36
|
+
},
|
|
37
|
+
/**
|
|
38
|
+
* Get hallucination detection report for a trace
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const report = await qualityMetrics.getHallucinationReport('trace_abc123');
|
|
43
|
+
* if (report.report.hasHallucinations) {
|
|
44
|
+
* console.log(`Risk level: ${report.report.riskLevel}`);
|
|
45
|
+
* for (const instance of report.report.instances) {
|
|
46
|
+
* console.log(`- ${instance.type}: ${instance.text}`);
|
|
47
|
+
* }
|
|
48
|
+
* }
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
async getHallucinationReport(traceId) {
|
|
52
|
+
return (0, client_1.apiRequestWithData)(`/quality/hallucination-report/${traceId}`, {
|
|
53
|
+
apiVersion: 'v1',
|
|
54
|
+
});
|
|
55
|
+
},
|
|
56
|
+
/**
|
|
57
|
+
* Evaluate RAG quality for provided content (ad-hoc evaluation)
|
|
58
|
+
*
|
|
59
|
+
* @example
|
|
60
|
+
* ```typescript
|
|
61
|
+
* const result = await qualityMetrics.evaluateRag({
|
|
62
|
+
* query: 'What is the refund policy?',
|
|
63
|
+
* response: 'You can get a refund within 30 days.',
|
|
64
|
+
* retrievedContexts: [
|
|
65
|
+
* { content: 'Our refund policy allows returns within 30 days of purchase.' },
|
|
66
|
+
* ],
|
|
67
|
+
* });
|
|
68
|
+
* console.log(`Groundedness: ${result.evaluation.groundedness}`);
|
|
69
|
+
* ```
|
|
70
|
+
*/
|
|
71
|
+
async evaluateRag(input) {
|
|
72
|
+
return (0, client_1.apiRequestWithData)('/quality/evaluate-rag', {
|
|
73
|
+
method: 'POST',
|
|
74
|
+
body: input,
|
|
75
|
+
apiVersion: 'v1',
|
|
76
|
+
});
|
|
77
|
+
},
|
|
78
|
+
/**
|
|
79
|
+
* Detect hallucinations in provided content (ad-hoc detection)
|
|
80
|
+
*
|
|
81
|
+
* @example
|
|
82
|
+
* ```typescript
|
|
83
|
+
* const result = await qualityMetrics.detectHallucinations({
|
|
84
|
+
* response: 'The product costs $99 and comes with a 2-year warranty.',
|
|
85
|
+
* contexts: [
|
|
86
|
+
* { content: 'The product costs $99 with a 1-year warranty.' },
|
|
87
|
+
* ],
|
|
88
|
+
* });
|
|
89
|
+
* if (result.report.hasHallucinations) {
|
|
90
|
+
* console.log('Detected hallucinations:', result.report.instances);
|
|
91
|
+
* }
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
94
|
+
async detectHallucinations(input) {
|
|
95
|
+
return (0, client_1.apiRequestWithData)('/quality/detect-hallucinations', {
|
|
96
|
+
method: 'POST',
|
|
97
|
+
body: input,
|
|
98
|
+
apiVersion: 'v1',
|
|
99
|
+
});
|
|
100
|
+
},
|
|
101
|
+
/**
|
|
102
|
+
* Get groundedness analysis for a trace
|
|
103
|
+
*
|
|
104
|
+
* @example
|
|
105
|
+
* ```typescript
|
|
106
|
+
* const result = await qualityMetrics.getGroundedness('trace_abc123');
|
|
107
|
+
* console.log(`Groundedness score: ${result.groundedness.score}`);
|
|
108
|
+
* console.log(`Grounded spans: ${result.summary.groundedSpans}`);
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
async getGroundedness(traceId) {
|
|
112
|
+
return (0, client_1.apiRequestWithData)(`/quality/groundedness/${traceId}`, {
|
|
113
|
+
apiVersion: 'v1',
|
|
114
|
+
});
|
|
115
|
+
},
|
|
116
|
+
/**
|
|
117
|
+
* Evaluate multiple traces for quality metrics in batch
|
|
118
|
+
*
|
|
119
|
+
* @example
|
|
120
|
+
* ```typescript
|
|
121
|
+
* const result = await qualityMetrics.evaluateBatch({
|
|
122
|
+
* traceIds: ['trace_1', 'trace_2', 'trace_3'],
|
|
123
|
+
* });
|
|
124
|
+
* console.log(`Average RAG score: ${result.summary.avgRagScore}`);
|
|
125
|
+
* console.log(`Hallucination rate: ${result.summary.hallucinationRate}%`);
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
128
|
+
async evaluateBatch(options) {
|
|
129
|
+
return (0, client_1.apiRequestWithData)('/quality/evaluate-batch', {
|
|
130
|
+
method: 'POST',
|
|
131
|
+
body: options,
|
|
132
|
+
apiVersion: 'v1',
|
|
133
|
+
});
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
// ============================================================================
|
|
137
|
+
// HELPER FUNCTIONS
|
|
138
|
+
// ============================================================================
|
|
139
|
+
/**
|
|
140
|
+
* Check if a RAG evaluation passes quality thresholds
|
|
141
|
+
*/
|
|
142
|
+
function passesQualityThreshold(evaluation, thresholds = {}) {
|
|
143
|
+
const { minGroundedness = 0.7, minOverallScore = 60, minGrade = 'C' } = thresholds;
|
|
144
|
+
const gradeOrder = { A: 4, B: 3, C: 2, D: 1, F: 0 };
|
|
145
|
+
const meetsGroundedness = evaluation.groundedness >= minGroundedness;
|
|
146
|
+
const meetsScore = evaluation.overallScore >= minOverallScore;
|
|
147
|
+
const meetsGrade = gradeOrder[evaluation.grade] >= gradeOrder[minGrade];
|
|
148
|
+
return meetsGroundedness && meetsScore && meetsGrade;
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Check if hallucination risk is acceptable
|
|
152
|
+
*/
|
|
153
|
+
function isHallucinationRiskAcceptable(report, maxRiskLevel = 'medium') {
|
|
154
|
+
const riskOrder = { low: 0, medium: 1, high: 2, critical: 3 };
|
|
155
|
+
return riskOrder[report.riskLevel] <= riskOrder[maxRiskLevel];
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Get quality recommendations based on evaluation
|
|
159
|
+
*/
|
|
160
|
+
function getQualityRecommendations(ragEval, hallucinationReport) {
|
|
161
|
+
const recommendations = [];
|
|
162
|
+
if (ragEval.groundedness < 0.7) {
|
|
163
|
+
recommendations.push('Improve grounding by increasing context relevance');
|
|
164
|
+
}
|
|
165
|
+
if (ragEval.contextRelevance < 0.6) {
|
|
166
|
+
recommendations.push('Tune retrieval to return more relevant contexts');
|
|
167
|
+
}
|
|
168
|
+
if (ragEval.citationAccuracy < 0.8) {
|
|
169
|
+
recommendations.push('Improve citation accuracy in responses');
|
|
170
|
+
}
|
|
171
|
+
if (hallucinationReport?.hasHallucinations) {
|
|
172
|
+
recommendations.push('Add fact-checking layer to reduce hallucinations');
|
|
173
|
+
}
|
|
174
|
+
if (recommendations.length === 0) {
|
|
175
|
+
recommendations.push('Quality metrics are within acceptable ranges');
|
|
176
|
+
}
|
|
177
|
+
return recommendations;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Format quality score for display
|
|
181
|
+
*/
|
|
182
|
+
function formatQualityScore(score) {
|
|
183
|
+
return `${Math.round(score * 100)}%`;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Get color indicator for grade
|
|
187
|
+
*/
|
|
188
|
+
function getGradeColor(grade) {
|
|
189
|
+
const colors = {
|
|
190
|
+
A: 'green',
|
|
191
|
+
B: 'blue',
|
|
192
|
+
C: 'yellow',
|
|
193
|
+
D: 'orange',
|
|
194
|
+
F: 'red',
|
|
195
|
+
};
|
|
196
|
+
return colors[grade];
|
|
197
|
+
}
|
|
198
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"quality-metrics.js","sourceRoot":"","sources":["../../src/api/quality-metrics.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAqVH,wDAgBC;AAKD,sEAMC;AAKD,8DA2BC;AAKD,gDAEC;AAKD,sCAWC;AAraD,2CAAgE;AAkKhE,+EAA+E;AAC/E,6BAA6B;AAC7B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,cAAc,GAAG;IAC5B;;;;;;;;;OASG;IACH,KAAK,CAAC,YAAY,CAAC,OAAe;QAKhC,OAAO,IAAA,2BAAkB,EAAC,uBAAuB,OAAO,EAAE,EAAE;YAC1D,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,sBAAsB,CAAC,OAAe;QAI1C,OAAO,IAAA,2BAAkB,EAAC,iCAAiC,OAAO,EAAE,EAAE;YACpE,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,WAAW,CAAC,KAMjB;QAIC,OAAO,IAAA,2BAAkB,EAAC,uBAAuB,EAAE;YACjD,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,KAAK;YACX,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,oBAAoB,CAAC,KAK1B;QAGC,OAAO,IAAA,2BAAkB,EAAC,gCAAgC,EAAE;YAC1D,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,KAAK;YACX,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,eAAe,CAAC,OAAe;QAcnC,OAAO,IAAA,2BAAkB,EAAC,yBAAyB,OAAO,EAAE,EAAE;YAC5D,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,aAAa,CAAC,OAGnB;QAIC,OAAO,IAAA,2BAAkB,EAAC,yBAAyB,EAAE;YACnD,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;GAEG;AACH,SAAgB,sBAAsB,CACpC,UAAyB,EACzB,aAII,EAAE;IAEN,MAAM,EAAE,eAAe,GAAG,GAAG,EAAE,eAAe,GAAG,EAAE,EAAE,QAAQ,GAAG,GAAG,EAAE,GAAG,UAAU,CAAC;IAEnF,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;IACpD,MAAM,iBAAiB,GAAG,UAAU,CAAC,YAAY,IAAI,eAAe,CAAC;IACrE,MAAM,UAAU,GAAG,UAAU,CAAC,YAAY,IAAI,eAAe,CAAC;IAC9D,MAAM,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC;IAExE,OAAO,iBAAiB,IAAI,UAAU,IAAI,UAAU,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,SAAgB,6BAA6B,CAC3C,MAA2B,EAC3B,eAA0C,QAAQ;IAElD,MAAM,SAAS,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;IAC9D,OAAO,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,YAAY,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,SAAgB,yBAAyB,CACvC,OAAsB,EACtB,mBAAyC;IAEzC,MAAM,eAAe,GAAa,EAAE,CAAC;IAErC,IAAI,OAAO,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC;QAC/B,eAAe,CAAC,IAAI,CAAC,mDAAmD,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,OAAO,CAAC,gBAAgB,GAAG,GAAG,EAAE,CAAC;QACnC,eAAe,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;IAC1E,CAAC;IAED,IAAI,OAAO,CAAC,gBAAgB,GAAG,GAAG,EAAE,CAAC;QACnC,eAAe,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,mBAAmB,EAAE,iBAAiB,EAAE,CAAC;QAC3C,eAAe,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;IAC3E,CAAC;IAED,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,eAAe,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IACvE,CAAC;IAED,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,KAAa;IAC9C,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,SAAgB,aAAa,CAC3B,KAAkC;IAElC,MAAM,MAAM,GAAG;QACb,CAAC,EAAE,OAAgB;QACnB,CAAC,EAAE,MAAe;QAClB,CAAC,EAAE,QAAiB;QACpB,CAAC,EAAE,QAAiB;QACpB,CAAC,EAAE,KAAc;KAClB,CAAC;IACF,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;AACvB,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.1 - Quality Metrics API\n *\n * RAG Evaluation & Hallucination Detection for AI quality assurance\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\nimport type { ApiResponse } from '../core/types';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\n/**\n * Retrieved context for RAG evaluation\n */\nexport interface RetrievedContext {\n  content: string;\n  chunkIndex?: number;\n  metadata?: Record<string, unknown>;\n  score?: number;\n}\n\n/**\n * Ground truth context\n */\nexport interface GroundTruthContext {\n  content: string;\n  chunkIndex?: number;\n}\n\n/**\n * Grounded span evidence\n */\nexport interface GroundedSpan {\n  text: string;\n  confidence: number;\n  sourceChunkIndex?: number;\n}\n\n/**\n * Ungrounded span evidence\n */\nexport interface UngroundedSpan {\n  text: string;\n  confidence: number;\n}\n\n/**\n * Citation mapping\n */\nexport interface CitationMap {\n  claim: string;\n  citedIndex: number;\n  isValid: boolean;\n}\n\n/**\n * RAG evaluation result\n */\nexport interface RAGEvaluation {\n  // Retrieval Quality\n  contextRelevance: number;\n  contextPrecision: number;\n  contextRecall: number;\n\n  // Generation Quality\n  groundedness: number;\n  faithfulness: number;\n  answerRelevance: number;\n\n  // Citation Quality\n  citationAccuracy: number;\n  citationCompleteness: number;\n\n  // Overall\n  overallScore: number;\n  grade: 'A' | 'B' | 'C' | 'D' | 'F';\n\n  // Details\n  groundedSpanCount?: number;\n  ungroundedSpanCount?: number;\n  issues: string[];\n  recommendations: string[];\n}\n\n/**\n * RAG evaluation evidence\n */\nexport interface RAGEvidence {\n  groundedSpans: GroundedSpan[];\n  ungroundedSpans: UngroundedSpan[];\n  citationMap: CitationMap[];\n}\n\n/**\n * Hallucination instance\n */\nexport interface HallucinationInstance {\n  type: string;\n  severity: 'low' | 'medium' | 'high' | 'critical';\n  text: string;\n  explanation: string;\n  confidence: number;\n  suggestedFix?: string;\n}\n\n/**\n * Hallucination detection report\n */\nexport interface HallucinationReport {\n  hasHallucinations: boolean;\n  hallucinationScore: number;\n  riskLevel: 'low' | 'medium' | 'high' | 'critical';\n  factualClaims: number;\n  verifiedClaims: number;\n  unverifiedClaims: number;\n  summary: string;\n  recommendations: string[];\n  instances: HallucinationInstance[];\n}\n\n/**\n * Groundedness analysis result\n */\nexport interface GroundednessResult {\n  score: number;\n  faithfulness: number;\n  contextRelevance: number;\n  grade: string;\n}\n\n/**\n * Batch evaluation result for a single trace\n */\nexport interface BatchEvaluationResult {\n  traceId: string;\n  success: boolean;\n  error?: string;\n  rag?: {\n    score: number;\n    grade: string;\n    mainIssue?: string;\n  };\n  hallucination?: {\n    hasIssues: boolean;\n    score: number;\n    topIssue?: string;\n  };\n}\n\n/**\n * Batch evaluation summary\n */\nexport interface BatchEvaluationSummary {\n  totalTraces: number;\n  successfulEvaluations: number;\n  avgRagScore: number;\n  hallucinationRate: number;\n  gradeDistribution: {\n    A: number;\n    B: number;\n    C: number;\n    D: number;\n    F: number;\n  };\n}\n\n// ============================================================================\n// QUALITY METRICS API CLIENT\n// ============================================================================\n\n/**\n * Quality Metrics API client for RAG evaluation and hallucination detection\n */\nexport const qualityMetrics = {\n  /**\n   * Get RAG quality scores for a specific trace\n   *\n   * @example\n   * ```typescript\n   * const scores = await qualityMetrics.getRagScores('trace_abc123');\n   * console.log(`Groundedness: ${scores.evaluation.groundedness}`);\n   * console.log(`Grade: ${scores.evaluation.grade}`);\n   * ```\n   */\n  async getRagScores(traceId: string): Promise<{\n    traceId: string;\n    evaluation: RAGEvaluation;\n    evidence: RAGEvidence;\n  }> {\n    return apiRequestWithData(`/quality/rag-scores/${traceId}`, {\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Get hallucination detection report for a trace\n   *\n   * @example\n   * ```typescript\n   * const report = await qualityMetrics.getHallucinationReport('trace_abc123');\n   * if (report.report.hasHallucinations) {\n   *   console.log(`Risk level: ${report.report.riskLevel}`);\n   *   for (const instance of report.report.instances) {\n   *     console.log(`- ${instance.type}: ${instance.text}`);\n   *   }\n   * }\n   * ```\n   */\n  async getHallucinationReport(traceId: string): Promise<{\n    traceId: string;\n    report: HallucinationReport;\n  }> {\n    return apiRequestWithData(`/quality/hallucination-report/${traceId}`, {\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Evaluate RAG quality for provided content (ad-hoc evaluation)\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.evaluateRag({\n   *   query: 'What is the refund policy?',\n   *   response: 'You can get a refund within 30 days.',\n   *   retrievedContexts: [\n   *     { content: 'Our refund policy allows returns within 30 days of purchase.' },\n   *   ],\n   * });\n   * console.log(`Groundedness: ${result.evaluation.groundedness}`);\n   * ```\n   */\n  async evaluateRag(input: {\n    query: string;\n    response: string;\n    retrievedContexts: RetrievedContext[];\n    groundTruthContexts?: GroundTruthContext[];\n    citations?: string[];\n  }): Promise<{\n    evaluation: RAGEvaluation;\n    evidence: RAGEvidence;\n  }> {\n    return apiRequestWithData('/quality/evaluate-rag', {\n      method: 'POST',\n      body: input,\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Detect hallucinations in provided content (ad-hoc detection)\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.detectHallucinations({\n   *   response: 'The product costs $99 and comes with a 2-year warranty.',\n   *   contexts: [\n   *     { content: 'The product costs $99 with a 1-year warranty.' },\n   *   ],\n   * });\n   * if (result.report.hasHallucinations) {\n   *   console.log('Detected hallucinations:', result.report.instances);\n   * }\n   * ```\n   */\n  async detectHallucinations(input: {\n    response: string;\n    contexts: Array<{ content: string; metadata?: Record<string, unknown> }>;\n    query?: string;\n    previousResponses?: string[];\n  }): Promise<{\n    report: HallucinationReport;\n  }> {\n    return apiRequestWithData('/quality/detect-hallucinations', {\n      method: 'POST',\n      body: input,\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Get groundedness analysis for a trace\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.getGroundedness('trace_abc123');\n   * console.log(`Groundedness score: ${result.groundedness.score}`);\n   * console.log(`Grounded spans: ${result.summary.groundedSpans}`);\n   * ```\n   */\n  async getGroundedness(traceId: string): Promise<{\n    traceId: string;\n    groundedness: GroundednessResult;\n    spans: {\n      grounded: Array<{ text: string; confidence: number; sourceIndex: number }>;\n      ungrounded: Array<{ text: string; confidence: number }>;\n    };\n    summary: {\n      totalSpans: number;\n      groundedSpans: number;\n      ungroundedSpans: number;\n      groundednessRatio: number;\n    };\n  }> {\n    return apiRequestWithData(`/quality/groundedness/${traceId}`, {\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Evaluate multiple traces for quality metrics in batch\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.evaluateBatch({\n   *   traceIds: ['trace_1', 'trace_2', 'trace_3'],\n   * });\n   * console.log(`Average RAG score: ${result.summary.avgRagScore}`);\n   * console.log(`Hallucination rate: ${result.summary.hallucinationRate}%`);\n   * ```\n   */\n  async evaluateBatch(options: {\n    traceIds: string[];\n    includeDetails?: boolean;\n  }): Promise<{\n    summary: BatchEvaluationSummary;\n    results: BatchEvaluationResult[];\n  }> {\n    return apiRequestWithData('/quality/evaluate-batch', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'v1',\n    });\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Check if a RAG evaluation passes quality thresholds\n */\nexport function passesQualityThreshold(\n  evaluation: RAGEvaluation,\n  thresholds: {\n    minGroundedness?: number;\n    minOverallScore?: number;\n    minGrade?: 'A' | 'B' | 'C' | 'D';\n  } = {}\n): boolean {\n  const { minGroundedness = 0.7, minOverallScore = 60, minGrade = 'C' } = thresholds;\n\n  const gradeOrder = { A: 4, B: 3, C: 2, D: 1, F: 0 };\n  const meetsGroundedness = evaluation.groundedness >= minGroundedness;\n  const meetsScore = evaluation.overallScore >= minOverallScore;\n  const meetsGrade = gradeOrder[evaluation.grade] >= gradeOrder[minGrade];\n\n  return meetsGroundedness && meetsScore && meetsGrade;\n}\n\n/**\n * Check if hallucination risk is acceptable\n */\nexport function isHallucinationRiskAcceptable(\n  report: HallucinationReport,\n  maxRiskLevel: 'low' | 'medium' | 'high' = 'medium'\n): boolean {\n  const riskOrder = { low: 0, medium: 1, high: 2, critical: 3 };\n  return riskOrder[report.riskLevel] <= riskOrder[maxRiskLevel];\n}\n\n/**\n * Get quality recommendations based on evaluation\n */\nexport function getQualityRecommendations(\n  ragEval: RAGEvaluation,\n  hallucinationReport?: HallucinationReport\n): string[] {\n  const recommendations: string[] = [];\n\n  if (ragEval.groundedness < 0.7) {\n    recommendations.push('Improve grounding by increasing context relevance');\n  }\n\n  if (ragEval.contextRelevance < 0.6) {\n    recommendations.push('Tune retrieval to return more relevant contexts');\n  }\n\n  if (ragEval.citationAccuracy < 0.8) {\n    recommendations.push('Improve citation accuracy in responses');\n  }\n\n  if (hallucinationReport?.hasHallucinations) {\n    recommendations.push('Add fact-checking layer to reduce hallucinations');\n  }\n\n  if (recommendations.length === 0) {\n    recommendations.push('Quality metrics are within acceptable ranges');\n  }\n\n  return recommendations;\n}\n\n/**\n * Format quality score for display\n */\nexport function formatQualityScore(score: number): string {\n  return `${Math.round(score * 100)}%`;\n}\n\n/**\n * Get color indicator for grade\n */\nexport function getGradeColor(\n  grade: 'A' | 'B' | 'C' | 'D' | 'F'\n): 'green' | 'blue' | 'yellow' | 'orange' | 'red' {\n  const colors = {\n    A: 'green' as const,\n    B: 'blue' as const,\n    C: 'yellow' as const,\n    D: 'orange' as const,\n    F: 'red' as const,\n  };\n  return colors[grade];\n}\n"]}
|