@thinkhive/sdk 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,303 @@
1
+ /**
2
+ * ThinkHive SDK v3.1 - Quality Metrics API
3
+ *
4
+ * RAG Evaluation & Hallucination Detection for AI quality assurance
5
+ */
6
+ /**
7
+ * Retrieved context for RAG evaluation
8
+ */
9
+ export interface RetrievedContext {
10
+ content: string;
11
+ chunkIndex?: number;
12
+ metadata?: Record<string, unknown>;
13
+ score?: number;
14
+ }
15
+ /**
16
+ * Ground truth context
17
+ */
18
+ export interface GroundTruthContext {
19
+ content: string;
20
+ chunkIndex?: number;
21
+ }
22
+ /**
23
+ * Grounded span evidence
24
+ */
25
+ export interface GroundedSpan {
26
+ text: string;
27
+ confidence: number;
28
+ sourceChunkIndex?: number;
29
+ }
30
+ /**
31
+ * Ungrounded span evidence
32
+ */
33
+ export interface UngroundedSpan {
34
+ text: string;
35
+ confidence: number;
36
+ }
37
+ /**
38
+ * Citation mapping
39
+ */
40
+ export interface CitationMap {
41
+ claim: string;
42
+ citedIndex: number;
43
+ isValid: boolean;
44
+ }
45
+ /**
46
+ * RAG evaluation result
47
+ */
48
+ export interface RAGEvaluation {
49
+ contextRelevance: number;
50
+ contextPrecision: number;
51
+ contextRecall: number;
52
+ groundedness: number;
53
+ faithfulness: number;
54
+ answerRelevance: number;
55
+ citationAccuracy: number;
56
+ citationCompleteness: number;
57
+ overallScore: number;
58
+ grade: 'A' | 'B' | 'C' | 'D' | 'F';
59
+ groundedSpanCount?: number;
60
+ ungroundedSpanCount?: number;
61
+ issues: string[];
62
+ recommendations: string[];
63
+ }
64
+ /**
65
+ * RAG evaluation evidence
66
+ */
67
+ export interface RAGEvidence {
68
+ groundedSpans: GroundedSpan[];
69
+ ungroundedSpans: UngroundedSpan[];
70
+ citationMap: CitationMap[];
71
+ }
72
+ /**
73
+ * Hallucination instance
74
+ */
75
+ export interface HallucinationInstance {
76
+ type: string;
77
+ severity: 'low' | 'medium' | 'high' | 'critical';
78
+ text: string;
79
+ explanation: string;
80
+ confidence: number;
81
+ suggestedFix?: string;
82
+ }
83
+ /**
84
+ * Hallucination detection report
85
+ */
86
+ export interface HallucinationReport {
87
+ hasHallucinations: boolean;
88
+ hallucinationScore: number;
89
+ riskLevel: 'low' | 'medium' | 'high' | 'critical';
90
+ factualClaims: number;
91
+ verifiedClaims: number;
92
+ unverifiedClaims: number;
93
+ summary: string;
94
+ recommendations: string[];
95
+ instances: HallucinationInstance[];
96
+ }
97
+ /**
98
+ * Groundedness analysis result
99
+ */
100
+ export interface GroundednessResult {
101
+ score: number;
102
+ faithfulness: number;
103
+ contextRelevance: number;
104
+ grade: string;
105
+ }
106
+ /**
107
+ * Batch evaluation result for a single trace
108
+ */
109
+ export interface BatchEvaluationResult {
110
+ traceId: string;
111
+ success: boolean;
112
+ error?: string;
113
+ rag?: {
114
+ score: number;
115
+ grade: string;
116
+ mainIssue?: string;
117
+ };
118
+ hallucination?: {
119
+ hasIssues: boolean;
120
+ score: number;
121
+ topIssue?: string;
122
+ };
123
+ }
124
+ /**
125
+ * Batch evaluation summary
126
+ */
127
+ export interface BatchEvaluationSummary {
128
+ totalTraces: number;
129
+ successfulEvaluations: number;
130
+ avgRagScore: number;
131
+ hallucinationRate: number;
132
+ gradeDistribution: {
133
+ A: number;
134
+ B: number;
135
+ C: number;
136
+ D: number;
137
+ F: number;
138
+ };
139
+ }
140
+ /**
141
+ * Quality Metrics API client for RAG evaluation and hallucination detection
142
+ */
143
+ export declare const qualityMetrics: {
144
+ /**
145
+ * Get RAG quality scores for a specific trace
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * const scores = await qualityMetrics.getRagScores('trace_abc123');
150
+ * console.log(`Groundedness: ${scores.evaluation.groundedness}`);
151
+ * console.log(`Grade: ${scores.evaluation.grade}`);
152
+ * ```
153
+ */
154
+ getRagScores(traceId: string): Promise<{
155
+ traceId: string;
156
+ evaluation: RAGEvaluation;
157
+ evidence: RAGEvidence;
158
+ }>;
159
+ /**
160
+ * Get hallucination detection report for a trace
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const report = await qualityMetrics.getHallucinationReport('trace_abc123');
165
+ * if (report.report.hasHallucinations) {
166
+ * console.log(`Risk level: ${report.report.riskLevel}`);
167
+ * for (const instance of report.report.instances) {
168
+ * console.log(`- ${instance.type}: ${instance.text}`);
169
+ * }
170
+ * }
171
+ * ```
172
+ */
173
+ getHallucinationReport(traceId: string): Promise<{
174
+ traceId: string;
175
+ report: HallucinationReport;
176
+ }>;
177
+ /**
178
+ * Evaluate RAG quality for provided content (ad-hoc evaluation)
179
+ *
180
+ * @example
181
+ * ```typescript
182
+ * const result = await qualityMetrics.evaluateRag({
183
+ * query: 'What is the refund policy?',
184
+ * response: 'You can get a refund within 30 days.',
185
+ * retrievedContexts: [
186
+ * { content: 'Our refund policy allows returns within 30 days of purchase.' },
187
+ * ],
188
+ * });
189
+ * console.log(`Groundedness: ${result.evaluation.groundedness}`);
190
+ * ```
191
+ */
192
+ evaluateRag(input: {
193
+ query: string;
194
+ response: string;
195
+ retrievedContexts: RetrievedContext[];
196
+ groundTruthContexts?: GroundTruthContext[];
197
+ citations?: string[];
198
+ }): Promise<{
199
+ evaluation: RAGEvaluation;
200
+ evidence: RAGEvidence;
201
+ }>;
202
+ /**
203
+ * Detect hallucinations in provided content (ad-hoc detection)
204
+ *
205
+ * @example
206
+ * ```typescript
207
+ * const result = await qualityMetrics.detectHallucinations({
208
+ * response: 'The product costs $99 and comes with a 2-year warranty.',
209
+ * contexts: [
210
+ * { content: 'The product costs $99 with a 1-year warranty.' },
211
+ * ],
212
+ * });
213
+ * if (result.report.hasHallucinations) {
214
+ * console.log('Detected hallucinations:', result.report.instances);
215
+ * }
216
+ * ```
217
+ */
218
+ detectHallucinations(input: {
219
+ response: string;
220
+ contexts: Array<{
221
+ content: string;
222
+ metadata?: Record<string, unknown>;
223
+ }>;
224
+ query?: string;
225
+ previousResponses?: string[];
226
+ }): Promise<{
227
+ report: HallucinationReport;
228
+ }>;
229
+ /**
230
+ * Get groundedness analysis for a trace
231
+ *
232
+ * @example
233
+ * ```typescript
234
+ * const result = await qualityMetrics.getGroundedness('trace_abc123');
235
+ * console.log(`Groundedness score: ${result.groundedness.score}`);
236
+ * console.log(`Grounded spans: ${result.summary.groundedSpans}`);
237
+ * ```
238
+ */
239
+ getGroundedness(traceId: string): Promise<{
240
+ traceId: string;
241
+ groundedness: GroundednessResult;
242
+ spans: {
243
+ grounded: Array<{
244
+ text: string;
245
+ confidence: number;
246
+ sourceIndex: number;
247
+ }>;
248
+ ungrounded: Array<{
249
+ text: string;
250
+ confidence: number;
251
+ }>;
252
+ };
253
+ summary: {
254
+ totalSpans: number;
255
+ groundedSpans: number;
256
+ ungroundedSpans: number;
257
+ groundednessRatio: number;
258
+ };
259
+ }>;
260
+ /**
261
+ * Evaluate multiple traces for quality metrics in batch
262
+ *
263
+ * @example
264
+ * ```typescript
265
+ * const result = await qualityMetrics.evaluateBatch({
266
+ * traceIds: ['trace_1', 'trace_2', 'trace_3'],
267
+ * });
268
+ * console.log(`Average RAG score: ${result.summary.avgRagScore}`);
269
+ * console.log(`Hallucination rate: ${result.summary.hallucinationRate}%`);
270
+ * ```
271
+ */
272
+ evaluateBatch(options: {
273
+ traceIds: string[];
274
+ includeDetails?: boolean;
275
+ }): Promise<{
276
+ summary: BatchEvaluationSummary;
277
+ results: BatchEvaluationResult[];
278
+ }>;
279
+ };
280
+ /**
281
+ * Check if a RAG evaluation passes quality thresholds
282
+ */
283
+ export declare function passesQualityThreshold(evaluation: RAGEvaluation, thresholds?: {
284
+ minGroundedness?: number;
285
+ minOverallScore?: number;
286
+ minGrade?: 'A' | 'B' | 'C' | 'D';
287
+ }): boolean;
288
+ /**
289
+ * Check if hallucination risk is acceptable
290
+ */
291
+ export declare function isHallucinationRiskAcceptable(report: HallucinationReport, maxRiskLevel?: 'low' | 'medium' | 'high'): boolean;
292
+ /**
293
+ * Get quality recommendations based on evaluation
294
+ */
295
+ export declare function getQualityRecommendations(ragEval: RAGEvaluation, hallucinationReport?: HallucinationReport): string[];
296
+ /**
297
+ * Format quality score for display
298
+ */
299
+ export declare function formatQualityScore(score: number): string;
300
+ /**
301
+ * Get color indicator for grade
302
+ */
303
+ export declare function getGradeColor(grade: 'A' | 'B' | 'C' | 'D' | 'F'): 'green' | 'blue' | 'yellow' | 'orange' | 'red';
@@ -0,0 +1,198 @@
1
+ "use strict";
2
+ /**
3
+ * ThinkHive SDK v3.1 - Quality Metrics API
4
+ *
5
+ * RAG Evaluation & Hallucination Detection for AI quality assurance
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.qualityMetrics = void 0;
9
+ exports.passesQualityThreshold = passesQualityThreshold;
10
+ exports.isHallucinationRiskAcceptable = isHallucinationRiskAcceptable;
11
+ exports.getQualityRecommendations = getQualityRecommendations;
12
+ exports.formatQualityScore = formatQualityScore;
13
+ exports.getGradeColor = getGradeColor;
14
+ const client_1 = require("../core/client");
15
+ // ============================================================================
16
+ // QUALITY METRICS API CLIENT
17
+ // ============================================================================
18
+ /**
19
+ * Quality Metrics API client for RAG evaluation and hallucination detection
20
+ */
21
+ exports.qualityMetrics = {
22
+ /**
23
+ * Get RAG quality scores for a specific trace
24
+ *
25
+ * @example
26
+ * ```typescript
27
+ * const scores = await qualityMetrics.getRagScores('trace_abc123');
28
+ * console.log(`Groundedness: ${scores.evaluation.groundedness}`);
29
+ * console.log(`Grade: ${scores.evaluation.grade}`);
30
+ * ```
31
+ */
32
+ async getRagScores(traceId) {
33
+ return (0, client_1.apiRequestWithData)(`/quality/rag-scores/${traceId}`, {
34
+ apiVersion: 'v1',
35
+ });
36
+ },
37
+ /**
38
+ * Get hallucination detection report for a trace
39
+ *
40
+ * @example
41
+ * ```typescript
42
+ * const report = await qualityMetrics.getHallucinationReport('trace_abc123');
43
+ * if (report.report.hasHallucinations) {
44
+ * console.log(`Risk level: ${report.report.riskLevel}`);
45
+ * for (const instance of report.report.instances) {
46
+ * console.log(`- ${instance.type}: ${instance.text}`);
47
+ * }
48
+ * }
49
+ * ```
50
+ */
51
+ async getHallucinationReport(traceId) {
52
+ return (0, client_1.apiRequestWithData)(`/quality/hallucination-report/${traceId}`, {
53
+ apiVersion: 'v1',
54
+ });
55
+ },
56
+ /**
57
+ * Evaluate RAG quality for provided content (ad-hoc evaluation)
58
+ *
59
+ * @example
60
+ * ```typescript
61
+ * const result = await qualityMetrics.evaluateRag({
62
+ * query: 'What is the refund policy?',
63
+ * response: 'You can get a refund within 30 days.',
64
+ * retrievedContexts: [
65
+ * { content: 'Our refund policy allows returns within 30 days of purchase.' },
66
+ * ],
67
+ * });
68
+ * console.log(`Groundedness: ${result.evaluation.groundedness}`);
69
+ * ```
70
+ */
71
+ async evaluateRag(input) {
72
+ return (0, client_1.apiRequestWithData)('/quality/evaluate-rag', {
73
+ method: 'POST',
74
+ body: input,
75
+ apiVersion: 'v1',
76
+ });
77
+ },
78
+ /**
79
+ * Detect hallucinations in provided content (ad-hoc detection)
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * const result = await qualityMetrics.detectHallucinations({
84
+ * response: 'The product costs $99 and comes with a 2-year warranty.',
85
+ * contexts: [
86
+ * { content: 'The product costs $99 with a 1-year warranty.' },
87
+ * ],
88
+ * });
89
+ * if (result.report.hasHallucinations) {
90
+ * console.log('Detected hallucinations:', result.report.instances);
91
+ * }
92
+ * ```
93
+ */
94
+ async detectHallucinations(input) {
95
+ return (0, client_1.apiRequestWithData)('/quality/detect-hallucinations', {
96
+ method: 'POST',
97
+ body: input,
98
+ apiVersion: 'v1',
99
+ });
100
+ },
101
+ /**
102
+ * Get groundedness analysis for a trace
103
+ *
104
+ * @example
105
+ * ```typescript
106
+ * const result = await qualityMetrics.getGroundedness('trace_abc123');
107
+ * console.log(`Groundedness score: ${result.groundedness.score}`);
108
+ * console.log(`Grounded spans: ${result.summary.groundedSpans}`);
109
+ * ```
110
+ */
111
+ async getGroundedness(traceId) {
112
+ return (0, client_1.apiRequestWithData)(`/quality/groundedness/${traceId}`, {
113
+ apiVersion: 'v1',
114
+ });
115
+ },
116
+ /**
117
+ * Evaluate multiple traces for quality metrics in batch
118
+ *
119
+ * @example
120
+ * ```typescript
121
+ * const result = await qualityMetrics.evaluateBatch({
122
+ * traceIds: ['trace_1', 'trace_2', 'trace_3'],
123
+ * });
124
+ * console.log(`Average RAG score: ${result.summary.avgRagScore}`);
125
+ * console.log(`Hallucination rate: ${result.summary.hallucinationRate}%`);
126
+ * ```
127
+ */
128
+ async evaluateBatch(options) {
129
+ return (0, client_1.apiRequestWithData)('/quality/evaluate-batch', {
130
+ method: 'POST',
131
+ body: options,
132
+ apiVersion: 'v1',
133
+ });
134
+ },
135
+ };
136
+ // ============================================================================
137
+ // HELPER FUNCTIONS
138
+ // ============================================================================
139
+ /**
140
+ * Check if a RAG evaluation passes quality thresholds
141
+ */
142
+ function passesQualityThreshold(evaluation, thresholds = {}) {
143
+ const { minGroundedness = 0.7, minOverallScore = 60, minGrade = 'C' } = thresholds;
144
+ const gradeOrder = { A: 4, B: 3, C: 2, D: 1, F: 0 };
145
+ const meetsGroundedness = evaluation.groundedness >= minGroundedness;
146
+ const meetsScore = evaluation.overallScore >= minOverallScore;
147
+ const meetsGrade = gradeOrder[evaluation.grade] >= gradeOrder[minGrade];
148
+ return meetsGroundedness && meetsScore && meetsGrade;
149
+ }
150
+ /**
151
+ * Check if hallucination risk is acceptable
152
+ */
153
+ function isHallucinationRiskAcceptable(report, maxRiskLevel = 'medium') {
154
+ const riskOrder = { low: 0, medium: 1, high: 2, critical: 3 };
155
+ return riskOrder[report.riskLevel] <= riskOrder[maxRiskLevel];
156
+ }
157
+ /**
158
+ * Get quality recommendations based on evaluation
159
+ */
160
+ function getQualityRecommendations(ragEval, hallucinationReport) {
161
+ const recommendations = [];
162
+ if (ragEval.groundedness < 0.7) {
163
+ recommendations.push('Improve grounding by increasing context relevance');
164
+ }
165
+ if (ragEval.contextRelevance < 0.6) {
166
+ recommendations.push('Tune retrieval to return more relevant contexts');
167
+ }
168
+ if (ragEval.citationAccuracy < 0.8) {
169
+ recommendations.push('Improve citation accuracy in responses');
170
+ }
171
+ if (hallucinationReport?.hasHallucinations) {
172
+ recommendations.push('Add fact-checking layer to reduce hallucinations');
173
+ }
174
+ if (recommendations.length === 0) {
175
+ recommendations.push('Quality metrics are within acceptable ranges');
176
+ }
177
+ return recommendations;
178
+ }
179
+ /**
180
+ * Format quality score for display
181
+ */
182
+ function formatQualityScore(score) {
183
+ return `${Math.round(score * 100)}%`;
184
+ }
185
+ /**
186
+ * Get color indicator for grade
187
+ */
188
+ function getGradeColor(grade) {
189
+ const colors = {
190
+ A: 'green',
191
+ B: 'blue',
192
+ C: 'yellow',
193
+ D: 'orange',
194
+ F: 'red',
195
+ };
196
+ return colors[grade];
197
+ }
198
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"quality-metrics.js","sourceRoot":"","sources":["../../src/api/quality-metrics.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAqVH,wDAgBC;AAKD,sEAMC;AAKD,8DA2BC;AAKD,gDAEC;AAKD,sCAWC;AAraD,2CAAgE;AAkKhE,+EAA+E;AAC/E,6BAA6B;AAC7B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,cAAc,GAAG;IAC5B;;;;;;;;;OASG;IACH,KAAK,CAAC,YAAY,CAAC,OAAe;QAKhC,OAAO,IAAA,2BAAkB,EAAC,uBAAuB,OAAO,EAAE,EAAE;YAC1D,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,sBAAsB,CAAC,OAAe;QAI1C,OAAO,IAAA,2BAAkB,EAAC,iCAAiC,OAAO,EAAE,EAAE;YACpE,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,WAAW,CAAC,KAMjB;QAIC,OAAO,IAAA,2BAAkB,EAAC,uBAAuB,EAAE;YACjD,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,KAAK;YACX,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,oBAAoB,CAAC,KAK1B;QAGC,OAAO,IAAA,2BAAkB,EAAC,gCAAgC,EAAE;YAC1D,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,KAAK;YACX,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,eAAe,CAAC,OAAe;QAcnC,OAAO,IAAA,2BAAkB,EAAC,yBAAyB,OAAO,EAAE,EAAE;YAC5D,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,aAAa,CAAC,OAGnB;QAIC,OAAO,IAAA,2BAAkB,EAAC,yBAAyB,EAAE;YACnD,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;GAEG;AACH,SAAgB,sBAAsB,CACpC,UAAyB,EACzB,aAII,EAAE;IAEN,MAAM,EAAE,eAAe,GAAG,GAAG,EAAE,eAAe,GAAG,EAAE,EAAE,QAAQ,GAAG,GAAG,EAAE,GAAG,UAAU,CAAC;IAEnF,MAAM,UAAU,GAAG,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;IACpD,MAAM,iBAAiB,GAAG,UAAU,CAAC,YAAY,IAAI,eAAe,CAAC;IACrE,MAAM,UAAU,GAAG,UAAU,CAAC,YAAY,IAAI,eAAe,CAAC;IAC9D,MAAM,UAAU,GAAG,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,UAAU,CAAC,QAAQ,CAAC,CAAC;IAExE,OAAO,iBAAiB,IAAI,UAAU,IAAI,UAAU,CAAC;AACvD,CAAC;AAED;;GAEG;AACH,SAAgB,6BAA6B,CAC3C,MAA2B,EAC3B,eAA0C,QAAQ;IAElD,MAAM,SAAS,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;IAC9D,OAAO,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC,YAAY,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,SAAgB,yBAAyB,CACvC,OAAsB,EACtB,mBAAyC;IAEzC,MAAM,eAAe,GAAa,EAAE,CAAC;IAErC,IAAI,OAAO,CAAC,YAAY,GAAG,GAAG,EAAE,CAAC;QAC/B,eAAe,CAAC,IAAI,CAAC,mDAAmD,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,OAAO,CAAC,gBAAgB,GAAG,GAAG,EAAE,CAAC;QACnC,eAAe,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;IAC1E,CAAC;IAED,IAAI,OAAO,CAAC,gBAAgB,GAAG,GAAG,EAAE,CAAC;QACnC,eAAe,CAAC,IAAI,CAAC,wCAAwC,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,mBAAmB,EAAE,iBAAiB,EAAE,CAAC;QAC3C,eAAe,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;IAC3E,CAAC;IAED,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjC,eAAe,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;IACvE,CAAC;IAED,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,SAAgB,kBAAkB,CAAC,KAAa;IAC9C,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,SAAgB,aAAa,CAC3B,KAAkC;IAElC,MAAM,MAAM,GAAG;QACb,CAAC,EAAE,OAAgB;QACnB,CAAC,EAAE,MAAe;QAClB,CAAC,EAAE,QAAiB;QACpB,CAAC,EAAE,QAAiB;QACpB,CAAC,EAAE,KAAc;KAClB,CAAC;IACF,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;AACvB,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.1 - Quality Metrics API\n *\n * RAG Evaluation & Hallucination Detection for AI quality assurance\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\nimport type { ApiResponse } from '../core/types';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\n/**\n * Retrieved context for RAG evaluation\n */\nexport interface RetrievedContext {\n  content: string;\n  chunkIndex?: number;\n  metadata?: Record<string, unknown>;\n  score?: number;\n}\n\n/**\n * Ground truth context\n */\nexport interface GroundTruthContext {\n  content: string;\n  chunkIndex?: number;\n}\n\n/**\n * Grounded span evidence\n */\nexport interface GroundedSpan {\n  text: string;\n  confidence: number;\n  sourceChunkIndex?: number;\n}\n\n/**\n * Ungrounded span evidence\n */\nexport interface UngroundedSpan {\n  text: string;\n  confidence: number;\n}\n\n/**\n * Citation mapping\n */\nexport interface CitationMap {\n  claim: string;\n  citedIndex: number;\n  isValid: boolean;\n}\n\n/**\n * RAG evaluation result\n */\nexport interface RAGEvaluation {\n  // Retrieval Quality\n  contextRelevance: number;\n  contextPrecision: number;\n  contextRecall: number;\n\n  // Generation Quality\n  groundedness: number;\n  faithfulness: number;\n  answerRelevance: number;\n\n  // Citation Quality\n  citationAccuracy: number;\n  citationCompleteness: number;\n\n  // Overall\n  overallScore: number;\n  grade: 'A' | 'B' | 'C' | 'D' | 'F';\n\n  // Details\n  groundedSpanCount?: number;\n  ungroundedSpanCount?: number;\n  issues: string[];\n  recommendations: string[];\n}\n\n/**\n * RAG evaluation evidence\n */\nexport interface RAGEvidence {\n  groundedSpans: GroundedSpan[];\n  ungroundedSpans: UngroundedSpan[];\n  citationMap: CitationMap[];\n}\n\n/**\n * Hallucination instance\n */\nexport interface HallucinationInstance {\n  type: string;\n  severity: 'low' | 'medium' | 'high' | 'critical';\n  text: string;\n  explanation: string;\n  confidence: number;\n  suggestedFix?: string;\n}\n\n/**\n * Hallucination detection report\n */\nexport interface HallucinationReport {\n  hasHallucinations: boolean;\n  hallucinationScore: number;\n  riskLevel: 'low' | 'medium' | 'high' | 'critical';\n  factualClaims: number;\n  verifiedClaims: number;\n  unverifiedClaims: number;\n  summary: string;\n  recommendations: string[];\n  instances: HallucinationInstance[];\n}\n\n/**\n * Groundedness analysis result\n */\nexport interface GroundednessResult {\n  score: number;\n  faithfulness: number;\n  contextRelevance: number;\n  grade: string;\n}\n\n/**\n * Batch evaluation result for a single trace\n */\nexport interface BatchEvaluationResult {\n  traceId: string;\n  success: boolean;\n  error?: string;\n  rag?: {\n    score: number;\n    grade: string;\n    mainIssue?: string;\n  };\n  hallucination?: {\n    hasIssues: boolean;\n    score: number;\n    topIssue?: string;\n  };\n}\n\n/**\n * Batch evaluation summary\n */\nexport interface BatchEvaluationSummary {\n  totalTraces: number;\n  successfulEvaluations: number;\n  avgRagScore: number;\n  hallucinationRate: number;\n  gradeDistribution: {\n    A: number;\n    B: number;\n    C: number;\n    D: number;\n    F: number;\n  };\n}\n\n// ============================================================================\n// QUALITY METRICS API CLIENT\n// ============================================================================\n\n/**\n * Quality Metrics API client for RAG evaluation and hallucination detection\n */\nexport const qualityMetrics = {\n  /**\n   * Get RAG quality scores for a specific trace\n   *\n   * @example\n   * ```typescript\n   * const scores = await qualityMetrics.getRagScores('trace_abc123');\n   * console.log(`Groundedness: ${scores.evaluation.groundedness}`);\n   * console.log(`Grade: ${scores.evaluation.grade}`);\n   * ```\n   */\n  async getRagScores(traceId: string): Promise<{\n    traceId: string;\n    evaluation: RAGEvaluation;\n    evidence: RAGEvidence;\n  }> {\n    return apiRequestWithData(`/quality/rag-scores/${traceId}`, {\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Get hallucination detection report for a trace\n   *\n   * @example\n   * ```typescript\n   * const report = await qualityMetrics.getHallucinationReport('trace_abc123');\n   * if (report.report.hasHallucinations) {\n   *   console.log(`Risk level: ${report.report.riskLevel}`);\n   *   for (const instance of report.report.instances) {\n   *     console.log(`- ${instance.type}: ${instance.text}`);\n   *   }\n   * }\n   * ```\n   */\n  async getHallucinationReport(traceId: string): Promise<{\n    traceId: string;\n    report: HallucinationReport;\n  }> {\n    return apiRequestWithData(`/quality/hallucination-report/${traceId}`, {\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Evaluate RAG quality for provided content (ad-hoc evaluation)\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.evaluateRag({\n   *   query: 'What is the refund policy?',\n   *   response: 'You can get a refund within 30 days.',\n   *   retrievedContexts: [\n   *     { content: 'Our refund policy allows returns within 30 days of purchase.' },\n   *   ],\n   * });\n   * console.log(`Groundedness: ${result.evaluation.groundedness}`);\n   * ```\n   */\n  async evaluateRag(input: {\n    query: string;\n    response: string;\n    retrievedContexts: RetrievedContext[];\n    groundTruthContexts?: GroundTruthContext[];\n    citations?: string[];\n  }): Promise<{\n    evaluation: RAGEvaluation;\n    evidence: RAGEvidence;\n  }> {\n    return apiRequestWithData('/quality/evaluate-rag', {\n      method: 'POST',\n      body: input,\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Detect hallucinations in provided content (ad-hoc detection)\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.detectHallucinations({\n   *   response: 'The product costs $99 and comes with a 2-year warranty.',\n   *   contexts: [\n   *     { content: 'The product costs $99 with a 1-year warranty.' },\n   *   ],\n   * });\n   * if (result.report.hasHallucinations) {\n   *   console.log('Detected hallucinations:', result.report.instances);\n   * }\n   * ```\n   */\n  async detectHallucinations(input: {\n    response: string;\n    contexts: Array<{ content: string; metadata?: Record<string, unknown> }>;\n    query?: string;\n    previousResponses?: string[];\n  }): Promise<{\n    report: HallucinationReport;\n  }> {\n    return apiRequestWithData('/quality/detect-hallucinations', {\n      method: 'POST',\n      body: input,\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Get groundedness analysis for a trace\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.getGroundedness('trace_abc123');\n   * console.log(`Groundedness score: ${result.groundedness.score}`);\n   * console.log(`Grounded spans: ${result.summary.groundedSpans}`);\n   * ```\n   */\n  async getGroundedness(traceId: string): Promise<{\n    traceId: string;\n    groundedness: GroundednessResult;\n    spans: {\n      grounded: Array<{ text: string; confidence: number; sourceIndex: number }>;\n      ungrounded: Array<{ text: string; confidence: number }>;\n    };\n    summary: {\n      totalSpans: number;\n      groundedSpans: number;\n      ungroundedSpans: number;\n      groundednessRatio: number;\n    };\n  }> {\n    return apiRequestWithData(`/quality/groundedness/${traceId}`, {\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Evaluate multiple traces for quality metrics in batch\n   *\n   * @example\n   * ```typescript\n   * const result = await qualityMetrics.evaluateBatch({\n   *   traceIds: ['trace_1', 'trace_2', 'trace_3'],\n   * });\n   * console.log(`Average RAG score: ${result.summary.avgRagScore}`);\n   * console.log(`Hallucination rate: ${result.summary.hallucinationRate}%`);\n   * ```\n   */\n  async evaluateBatch(options: {\n    traceIds: string[];\n    includeDetails?: boolean;\n  }): Promise<{\n    summary: BatchEvaluationSummary;\n    results: BatchEvaluationResult[];\n  }> {\n    return apiRequestWithData('/quality/evaluate-batch', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'v1',\n    });\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Check if a RAG evaluation passes quality thresholds\n */\nexport function passesQualityThreshold(\n  evaluation: RAGEvaluation,\n  thresholds: {\n    minGroundedness?: number;\n    minOverallScore?: number;\n    minGrade?: 'A' | 'B' | 'C' | 'D';\n  } = {}\n): boolean {\n  const { minGroundedness = 0.7, minOverallScore = 60, minGrade = 'C' } = thresholds;\n\n  const gradeOrder = { A: 4, B: 3, C: 2, D: 1, F: 0 };\n  const meetsGroundedness = evaluation.groundedness >= minGroundedness;\n  const meetsScore = evaluation.overallScore >= minOverallScore;\n  const meetsGrade = gradeOrder[evaluation.grade] >= gradeOrder[minGrade];\n\n  return meetsGroundedness && meetsScore && meetsGrade;\n}\n\n/**\n * Check if hallucination risk is acceptable\n */\nexport function isHallucinationRiskAcceptable(\n  report: HallucinationReport,\n  maxRiskLevel: 'low' | 'medium' | 'high' = 'medium'\n): boolean {\n  const riskOrder = { low: 0, medium: 1, high: 2, critical: 3 };\n  return riskOrder[report.riskLevel] <= riskOrder[maxRiskLevel];\n}\n\n/**\n * Get quality recommendations based on evaluation\n */\nexport function getQualityRecommendations(\n  ragEval: RAGEvaluation,\n  hallucinationReport?: HallucinationReport\n): string[] {\n  const recommendations: string[] = [];\n\n  if (ragEval.groundedness < 0.7) {\n    recommendations.push('Improve grounding by increasing context relevance');\n  }\n\n  if (ragEval.contextRelevance < 0.6) {\n    recommendations.push('Tune retrieval to return more relevant contexts');\n  }\n\n  if (ragEval.citationAccuracy < 0.8) {\n    recommendations.push('Improve citation accuracy in responses');\n  }\n\n  if (hallucinationReport?.hasHallucinations) {\n    recommendations.push('Add fact-checking layer to reduce hallucinations');\n  }\n\n  if (recommendations.length === 0) {\n    recommendations.push('Quality metrics are within acceptable ranges');\n  }\n\n  return recommendations;\n}\n\n/**\n * Format quality score for display\n */\nexport function formatQualityScore(score: number): string {\n  return `${Math.round(score * 100)}%`;\n}\n\n/**\n * Get color indicator for grade\n */\nexport function getGradeColor(\n  grade: 'A' | 'B' | 'C' | 'D' | 'F'\n): 'green' | 'blue' | 'yellow' | 'orange' | 'red' {\n  const colors = {\n    A: 'green' as const,\n    B: 'blue' as const,\n    C: 'yellow' as const,\n    D: 'orange' as const,\n    F: 'red' as const,\n  };\n  return colors[grade];\n}\n"]}