@thinkhive/sdk 3.1.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,200 @@
1
+ /**
2
+ * ThinkHive SDK v3.0 - Conversation Evaluation API
3
+ *
4
+ * API for multi-turn conversation evaluation
5
+ */
6
+ export type AggregateMethod = 'worst' | 'average' | 'weighted' | 'final_turn' | 'majority';
7
+ export interface SessionTrace {
8
+ id: string;
9
+ sessionId: string;
10
+ turnNumber: number;
11
+ userMessage: string;
12
+ agentResponse: string;
13
+ timestamp: string;
14
+ metadata?: Record<string, unknown>;
15
+ }
16
+ export interface TurnEvaluation {
17
+ traceId: string;
18
+ turnNumber: number;
19
+ passed: boolean;
20
+ score: number;
21
+ reasoning: string;
22
+ }
23
+ export interface ConversationEvalResult {
24
+ sessionId: string;
25
+ criterionId: string;
26
+ turnCount: number;
27
+ turnResults: TurnEvaluation[];
28
+ aggregatePassed: boolean;
29
+ aggregateScore: number;
30
+ aggregateMethod: AggregateMethod;
31
+ reasoning: string;
32
+ metadata?: Record<string, unknown>;
33
+ }
34
+ export interface EvaluateConversationOptions {
35
+ sessionId: string;
36
+ criterionId: string;
37
+ options?: {
38
+ aggregateMethod?: AggregateMethod;
39
+ minTurns?: number;
40
+ maxTurns?: number;
41
+ };
42
+ }
43
+ export interface AggregationMethodInfo {
44
+ id: AggregateMethod;
45
+ name: string;
46
+ description: string;
47
+ useCase: string;
48
+ }
49
+ /**
50
+ * Conversation Evaluation API client for multi-turn evaluation
51
+ */
52
+ export declare const conversationEval: {
53
+ /**
54
+ * Get traces for a conversation session
55
+ *
56
+ * @example
57
+ * ```typescript
58
+ * const traces = await conversationEval.getSessionTraces('session_123');
59
+ * console.log(`Conversation has ${traces.length} turns`);
60
+ * ```
61
+ */
62
+ getSessionTraces(sessionId: string): Promise<SessionTrace[]>;
63
+ /**
64
+ * Run conversation-level evaluation
65
+ *
66
+ * @example
67
+ * ```typescript
68
+ * const result = await conversationEval.evaluate({
69
+ * sessionId: 'session_123',
70
+ * criterionId: 'criterion_456',
71
+ * options: {
72
+ * aggregateMethod: 'average',
73
+ * minTurns: 2,
74
+ * },
75
+ * });
76
+ * console.log(`Conversation score: ${result.aggregateScore}`);
77
+ * ```
78
+ */
79
+ evaluate(options: EvaluateConversationOptions): Promise<ConversationEvalResult>;
80
+ /**
81
+ * Get available aggregation methods with descriptions
82
+ *
83
+ * @example
84
+ * ```typescript
85
+ * const methods = await conversationEval.getAggregationMethods();
86
+ * for (const method of methods) {
87
+ * console.log(`${method.name}: ${method.description}`);
88
+ * }
89
+ * ```
90
+ */
91
+ getAggregationMethods(): Promise<AggregationMethodInfo[]>;
92
+ };
93
+ /**
94
+ * Calculate worst-turn aggregation
95
+ *
96
+ * @param turnResults - Array of turn evaluation results
97
+ * @returns Aggregated result using worst turn logic
98
+ *
99
+ * @example
100
+ * ```typescript
101
+ * const result = aggregateWorst(turnResults);
102
+ * // Fails if any turn fails
103
+ * ```
104
+ */
105
+ export declare function aggregateWorst(turnResults: TurnEvaluation[]): {
106
+ passed: boolean;
107
+ score: number;
108
+ };
109
+ /**
110
+ * Calculate average aggregation
111
+ *
112
+ * @param turnResults - Array of turn evaluation results
113
+ * @returns Aggregated result using average logic
114
+ *
115
+ * @example
116
+ * ```typescript
117
+ * const result = aggregateAverage(turnResults);
118
+ * ```
119
+ */
120
+ export declare function aggregateAverage(turnResults: TurnEvaluation[]): {
121
+ passed: boolean;
122
+ score: number;
123
+ };
124
+ /**
125
+ * Calculate weighted average aggregation (later turns weighted more)
126
+ *
127
+ * @param turnResults - Array of turn evaluation results
128
+ * @returns Aggregated result using weighted average logic
129
+ *
130
+ * @example
131
+ * ```typescript
132
+ * const result = aggregateWeighted(turnResults);
133
+ * // Later turns have higher weight
134
+ * ```
135
+ */
136
+ export declare function aggregateWeighted(turnResults: TurnEvaluation[]): {
137
+ passed: boolean;
138
+ score: number;
139
+ };
140
+ /**
141
+ * Calculate final-turn aggregation
142
+ *
143
+ * @param turnResults - Array of turn evaluation results
144
+ * @returns Aggregated result using only the final turn
145
+ *
146
+ * @example
147
+ * ```typescript
148
+ * const result = aggregateFinalTurn(turnResults);
149
+ * // Only final turn matters
150
+ * ```
151
+ */
152
+ export declare function aggregateFinalTurn(turnResults: TurnEvaluation[]): {
153
+ passed: boolean;
154
+ score: number;
155
+ };
156
+ /**
157
+ * Calculate majority vote aggregation
158
+ *
159
+ * @param turnResults - Array of turn evaluation results
160
+ * @returns Aggregated result using majority vote logic
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const result = aggregateMajority(turnResults);
165
+ * // Passes if majority of turns pass
166
+ * ```
167
+ */
168
+ export declare function aggregateMajority(turnResults: TurnEvaluation[]): {
169
+ passed: boolean;
170
+ score: number;
171
+ };
172
+ /**
173
+ * Get appropriate aggregation function for a method
174
+ *
175
+ * @param method - Aggregation method name
176
+ * @returns Aggregation function
177
+ */
178
+ export declare function getAggregator(method: AggregateMethod): (turnResults: TurnEvaluation[]) => {
179
+ passed: boolean;
180
+ score: number;
181
+ };
182
+ /**
183
+ * Find problematic turns in a conversation
184
+ *
185
+ * @param result - Conversation evaluation result
186
+ * @param scoreThreshold - Minimum acceptable score (default 70)
187
+ * @returns Array of problematic turn results
188
+ */
189
+ export declare function getProblematicTurns(result: ConversationEvalResult, scoreThreshold?: number): TurnEvaluation[];
190
+ /**
191
+ * Calculate conversation quality trend
192
+ *
193
+ * @param result - Conversation evaluation result
194
+ * @returns Trend analysis
195
+ */
196
+ export declare function analyzeConversationTrend(result: ConversationEvalResult): {
197
+ direction: 'improving' | 'declining' | 'stable';
198
+ firstHalfAvg: number;
199
+ secondHalfAvg: number;
200
+ };
@@ -0,0 +1,235 @@
1
+ "use strict";
2
+ /**
3
+ * ThinkHive SDK v3.0 - Conversation Evaluation API
4
+ *
5
+ * API for multi-turn conversation evaluation
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.conversationEval = void 0;
9
+ exports.aggregateWorst = aggregateWorst;
10
+ exports.aggregateAverage = aggregateAverage;
11
+ exports.aggregateWeighted = aggregateWeighted;
12
+ exports.aggregateFinalTurn = aggregateFinalTurn;
13
+ exports.aggregateMajority = aggregateMajority;
14
+ exports.getAggregator = getAggregator;
15
+ exports.getProblematicTurns = getProblematicTurns;
16
+ exports.analyzeConversationTrend = analyzeConversationTrend;
17
+ const client_1 = require("../core/client");
18
+ // ============================================================================
19
+ // CONVERSATION EVAL API CLIENT
20
+ // ============================================================================
21
+ /**
22
+ * Conversation Evaluation API client for multi-turn evaluation
23
+ */
24
+ exports.conversationEval = {
25
+ /**
26
+ * Get traces for a conversation session
27
+ *
28
+ * @example
29
+ * ```typescript
30
+ * const traces = await conversationEval.getSessionTraces('session_123');
31
+ * console.log(`Conversation has ${traces.length} turns`);
32
+ * ```
33
+ */
34
+ async getSessionTraces(sessionId) {
35
+ return (0, client_1.apiRequestWithData)(`/conversation-eval/traces?sessionId=${sessionId}`, { apiVersion: 'none' });
36
+ },
37
+ /**
38
+ * Run conversation-level evaluation
39
+ *
40
+ * @example
41
+ * ```typescript
42
+ * const result = await conversationEval.evaluate({
43
+ * sessionId: 'session_123',
44
+ * criterionId: 'criterion_456',
45
+ * options: {
46
+ * aggregateMethod: 'average',
47
+ * minTurns: 2,
48
+ * },
49
+ * });
50
+ * console.log(`Conversation score: ${result.aggregateScore}`);
51
+ * ```
52
+ */
53
+ async evaluate(options) {
54
+ return (0, client_1.apiRequestWithData)('/conversation-eval/evaluate', {
55
+ method: 'POST',
56
+ body: options,
57
+ apiVersion: 'none',
58
+ });
59
+ },
60
+ /**
61
+ * Get available aggregation methods with descriptions
62
+ *
63
+ * @example
64
+ * ```typescript
65
+ * const methods = await conversationEval.getAggregationMethods();
66
+ * for (const method of methods) {
67
+ * console.log(`${method.name}: ${method.description}`);
68
+ * }
69
+ * ```
70
+ */
71
+ async getAggregationMethods() {
72
+ return (0, client_1.apiRequestWithData)('/conversation-eval/aggregation-methods', { apiVersion: 'none' });
73
+ },
74
+ };
75
+ // ============================================================================
76
+ // HELPER FUNCTIONS
77
+ // ============================================================================
78
+ /**
79
+ * Calculate worst-turn aggregation
80
+ *
81
+ * @param turnResults - Array of turn evaluation results
82
+ * @returns Aggregated result using worst turn logic
83
+ *
84
+ * @example
85
+ * ```typescript
86
+ * const result = aggregateWorst(turnResults);
87
+ * // Fails if any turn fails
88
+ * ```
89
+ */
90
+ function aggregateWorst(turnResults) {
91
+ if (turnResults.length === 0)
92
+ return { passed: false, score: 0 };
93
+ const worstScore = Math.min(...turnResults.map(t => t.score));
94
+ const passed = turnResults.every(t => t.passed);
95
+ return { passed, score: worstScore };
96
+ }
97
+ /**
98
+ * Calculate average aggregation
99
+ *
100
+ * @param turnResults - Array of turn evaluation results
101
+ * @returns Aggregated result using average logic
102
+ *
103
+ * @example
104
+ * ```typescript
105
+ * const result = aggregateAverage(turnResults);
106
+ * ```
107
+ */
108
+ function aggregateAverage(turnResults) {
109
+ if (turnResults.length === 0)
110
+ return { passed: false, score: 0 };
111
+ const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;
112
+ const passedCount = turnResults.filter(t => t.passed).length;
113
+ const passed = passedCount > turnResults.length / 2;
114
+ return { passed, score: avgScore };
115
+ }
116
+ /**
117
+ * Calculate weighted average aggregation (later turns weighted more)
118
+ *
119
+ * @param turnResults - Array of turn evaluation results
120
+ * @returns Aggregated result using weighted average logic
121
+ *
122
+ * @example
123
+ * ```typescript
124
+ * const result = aggregateWeighted(turnResults);
125
+ * // Later turns have higher weight
126
+ * ```
127
+ */
128
+ function aggregateWeighted(turnResults) {
129
+ if (turnResults.length === 0)
130
+ return { passed: false, score: 0 };
131
+ // Linear weights: 1, 2, 3, ...
132
+ let weightedSum = 0;
133
+ let weightTotal = 0;
134
+ let weightedPassSum = 0;
135
+ turnResults.forEach((turn, index) => {
136
+ const weight = index + 1;
137
+ weightedSum += turn.score * weight;
138
+ weightedPassSum += (turn.passed ? 1 : 0) * weight;
139
+ weightTotal += weight;
140
+ });
141
+ const avgScore = weightedSum / weightTotal;
142
+ const passed = (weightedPassSum / weightTotal) > 0.5;
143
+ return { passed, score: avgScore };
144
+ }
145
+ /**
146
+ * Calculate final-turn aggregation
147
+ *
148
+ * @param turnResults - Array of turn evaluation results
149
+ * @returns Aggregated result using only the final turn
150
+ *
151
+ * @example
152
+ * ```typescript
153
+ * const result = aggregateFinalTurn(turnResults);
154
+ * // Only final turn matters
155
+ * ```
156
+ */
157
+ function aggregateFinalTurn(turnResults) {
158
+ if (turnResults.length === 0)
159
+ return { passed: false, score: 0 };
160
+ const finalTurn = turnResults[turnResults.length - 1];
161
+ return { passed: finalTurn.passed, score: finalTurn.score };
162
+ }
163
+ /**
164
+ * Calculate majority vote aggregation
165
+ *
166
+ * @param turnResults - Array of turn evaluation results
167
+ * @returns Aggregated result using majority vote logic
168
+ *
169
+ * @example
170
+ * ```typescript
171
+ * const result = aggregateMajority(turnResults);
172
+ * // Passes if majority of turns pass
173
+ * ```
174
+ */
175
+ function aggregateMajority(turnResults) {
176
+ if (turnResults.length === 0)
177
+ return { passed: false, score: 0 };
178
+ const passedCount = turnResults.filter(t => t.passed).length;
179
+ const passed = passedCount > turnResults.length / 2;
180
+ const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;
181
+ return { passed, score: avgScore };
182
+ }
183
+ /**
184
+ * Get appropriate aggregation function for a method
185
+ *
186
+ * @param method - Aggregation method name
187
+ * @returns Aggregation function
188
+ */
189
+ function getAggregator(method) {
190
+ switch (method) {
191
+ case 'worst': return aggregateWorst;
192
+ case 'average': return aggregateAverage;
193
+ case 'weighted': return aggregateWeighted;
194
+ case 'final_turn': return aggregateFinalTurn;
195
+ case 'majority': return aggregateMajority;
196
+ default: return aggregateAverage;
197
+ }
198
+ }
199
+ /**
200
+ * Find problematic turns in a conversation
201
+ *
202
+ * @param result - Conversation evaluation result
203
+ * @param scoreThreshold - Minimum acceptable score (default 70)
204
+ * @returns Array of problematic turn results
205
+ */
206
+ function getProblematicTurns(result, scoreThreshold = 70) {
207
+ return result.turnResults.filter(t => !t.passed || t.score < scoreThreshold);
208
+ }
209
+ /**
210
+ * Calculate conversation quality trend
211
+ *
212
+ * @param result - Conversation evaluation result
213
+ * @returns Trend analysis
214
+ */
215
+ function analyzeConversationTrend(result) {
216
+ const turns = result.turnResults;
217
+ if (turns.length < 2) {
218
+ return { direction: 'stable', firstHalfAvg: 0, secondHalfAvg: 0 };
219
+ }
220
+ const midpoint = Math.floor(turns.length / 2);
221
+ const firstHalf = turns.slice(0, midpoint);
222
+ const secondHalf = turns.slice(midpoint);
223
+ const firstHalfAvg = firstHalf.reduce((sum, t) => sum + t.score, 0) / firstHalf.length;
224
+ const secondHalfAvg = secondHalf.reduce((sum, t) => sum + t.score, 0) / secondHalf.length;
225
+ const diff = secondHalfAvg - firstHalfAvg;
226
+ let direction;
227
+ if (diff > 5)
228
+ direction = 'improving';
229
+ else if (diff < -5)
230
+ direction = 'declining';
231
+ else
232
+ direction = 'stable';
233
+ return { direction, firstHalfAvg, secondHalfAvg };
234
+ }
235
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"conversation-eval.js","sourceRoot":"","sources":["../../src/api/conversation-eval.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AA4IH,wCAOC;AAaD,4CAQC;AAcD,8CAmBC;AAcD,gDAKC;AAcD,8CAQC;AAQD,sCAWC;AASD,kDAKC;AAQD,4DAyBC;AAlTD,2CAAoD;AAuDpD,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,gBAAgB,GAAG;IAC9B;;;;;;;;OAQG;IACH,KAAK,CAAC,gBAAgB,CAAC,SAAiB;QACtC,OAAO,IAAA,2BAAkB,EACvB,uCAAuC,SAAS,EAAE,EAClD,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAoC;QACjD,OAAO,IAAA,2BAAkB,EAAyB,6BAA6B,EAAE;YAC/E,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,qBAAqB;QACzB,OAAO,IAAA,2BAAkB,EACvB,wCAAwC,EACxC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,cAAc,CAAC,WAA6B;IAC1D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAEhD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;AACvC,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAgB,gBAAgB,CAAC,WAA6B;IAC5D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;IACvF,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IAEpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,+BAA+B;IAC/B,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAClC,MAAM,MAAM,GAAG,KAAK,GAAG,CAAC,CAAC;QACzB,WAAW,IAAI,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;QACnC,eAAe,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;QAClD,WAAW,IAAI,MAAM,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,WAAW,GAAG,WAAW,CAAC;IAC3C,MAAM,MAAM,GAAG,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC;IAErD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,kBAAkB,CAAC,WAA6B;IAC9D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,SAAS,GAAG,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtD,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,CAAC;AAC9D,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;IAEvF,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,aAAa,CAC3B,MAAuB;IAEvB,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,OAAO,CAAC,CAAC,OAAO,cAAc,CAAC;QACpC,KAAK,SAAS,CAAC,CAAC,OAAO,gBAAgB,CAAC;QACxC,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,KAAK,YAAY,CAAC,CAAC,OAAO,kBAAkB,CAAC;QAC7C,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,OAAO,CAAC,CAAC,OAAO,gBAAgB,CAAC;IACnC,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,mBAAmB,CACjC,MAA8B,EAC9B,cAAc,GAAG,EAAE;IAEnB,OAAO,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,GAAG,cAAc,CAAC,CAAC;AAC/E,CAAC;AAED;;;;;GAKG;AACH,SAAgB,wBAAwB,CAAC,MAA8B;IAKrE,MAAM,KAAK,GAAG,MAAM,CAAC,WAAW,CAAC;IACjC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAEzC,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;IACvF,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAE1F,MAAM,IAAI,GAAG,aAAa,GAAG,YAAY,CAAC;IAC1C,IAAI,SAA+C,CAAC;IAEpD,IAAI,IAAI,GAAG,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;SACjC,IAAI,IAAI,GAAG,CAAC,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;;QACvC,SAAS,GAAG,QAAQ,CAAC;IAE1B,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;AACpD,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Conversation Evaluation API\n *\n * API for multi-turn conversation evaluation\n */\n\nimport { apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type AggregateMethod = 'worst' | 'average' | 'weighted' | 'final_turn' | 'majority';\n\nexport interface SessionTrace {\n  id: string;\n  sessionId: string;\n  turnNumber: number;\n  userMessage: string;\n  agentResponse: string;\n  timestamp: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface TurnEvaluation {\n  traceId: string;\n  turnNumber: number;\n  passed: boolean;\n  score: number;\n  reasoning: string;\n}\n\nexport interface ConversationEvalResult {\n  sessionId: string;\n  criterionId: string;\n  turnCount: number;\n  turnResults: TurnEvaluation[];\n  aggregatePassed: boolean;\n  aggregateScore: number;\n  aggregateMethod: AggregateMethod;\n  reasoning: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface EvaluateConversationOptions {\n  sessionId: string;\n  criterionId: string;\n  options?: {\n    aggregateMethod?: AggregateMethod;\n    minTurns?: number;\n    maxTurns?: number;\n  };\n}\n\nexport interface AggregationMethodInfo {\n  id: AggregateMethod;\n  name: string;\n  description: string;\n  useCase: string;\n}\n\n// ============================================================================\n// CONVERSATION EVAL API CLIENT\n// ============================================================================\n\n/**\n * Conversation Evaluation API client for multi-turn evaluation\n */\nexport const conversationEval = {\n  /**\n   * Get traces for a conversation session\n   *\n   * @example\n   * ```typescript\n   * const traces = await conversationEval.getSessionTraces('session_123');\n   * console.log(`Conversation has ${traces.length} turns`);\n   * ```\n   */\n  async getSessionTraces(sessionId: string): Promise<SessionTrace[]> {\n    return apiRequestWithData<SessionTrace[]>(\n      `/conversation-eval/traces?sessionId=${sessionId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Run conversation-level evaluation\n   *\n   * @example\n   * ```typescript\n   * const result = await conversationEval.evaluate({\n   *   sessionId: 'session_123',\n   *   criterionId: 'criterion_456',\n   *   options: {\n   *     aggregateMethod: 'average',\n   *     minTurns: 2,\n   *   },\n   * });\n   * console.log(`Conversation score: ${result.aggregateScore}`);\n   * ```\n   */\n  async evaluate(options: EvaluateConversationOptions): Promise<ConversationEvalResult> {\n    return apiRequestWithData<ConversationEvalResult>('/conversation-eval/evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get available aggregation methods with descriptions\n   *\n   * @example\n   * ```typescript\n   * const methods = await conversationEval.getAggregationMethods();\n   * for (const method of methods) {\n   *   console.log(`${method.name}: ${method.description}`);\n   * }\n   * ```\n   */\n  async getAggregationMethods(): Promise<AggregationMethodInfo[]> {\n    return apiRequestWithData<AggregationMethodInfo[]>(\n      '/conversation-eval/aggregation-methods',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate worst-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using worst turn logic\n *\n * @example\n * ```typescript\n * const result = aggregateWorst(turnResults);\n * // Fails if any turn fails\n * ```\n */\nexport function aggregateWorst(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const worstScore = Math.min(...turnResults.map(t => t.score));\n  const passed = turnResults.every(t => t.passed);\n\n  return { passed, score: worstScore };\n}\n\n/**\n * Calculate average aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using average logic\n *\n * @example\n * ```typescript\n * const result = aggregateAverage(turnResults);\n * ```\n */\nexport function aggregateAverage(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;\n  const passedCount = turnResults.filter(t => t.passed).length;\n  const passed = passedCount > turnResults.length / 2;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate weighted average aggregation (later turns weighted more)\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using weighted average logic\n *\n * @example\n * ```typescript\n * const result = aggregateWeighted(turnResults);\n * // Later turns have higher weight\n * ```\n */\nexport function aggregateWeighted(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  // Linear weights: 1, 2, 3, ...\n  let weightedSum = 0;\n  let weightTotal = 0;\n  let weightedPassSum = 0;\n\n  turnResults.forEach((turn, index) => {\n    const weight = index + 1;\n    weightedSum += turn.score * weight;\n    weightedPassSum += (turn.passed ? 1 : 0) * weight;\n    weightTotal += weight;\n  });\n\n  const avgScore = weightedSum / weightTotal;\n  const passed = (weightedPassSum / weightTotal) > 0.5;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate final-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using only the final turn\n *\n * @example\n * ```typescript\n * const result = aggregateFinalTurn(turnResults);\n * // Only final turn matters\n * ```\n */\nexport function aggregateFinalTurn(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const finalTurn = turnResults[turnResults.length - 1];\n  return { passed: finalTurn.passed, score: finalTurn.score };\n}\n\n/**\n * Calculate majority vote aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using majority vote logic\n *\n * @example\n * ```typescript\n * const result = aggregateMajority(turnResults);\n * // Passes if majority of turns pass\n * ```\n */\nexport function aggregateMajority(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const passedCount = turnResults.filter(t => t.passed).length;\n  const passed = passedCount > turnResults.length / 2;\n  const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Get appropriate aggregation function for a method\n *\n * @param method - Aggregation method name\n * @returns Aggregation function\n */\nexport function getAggregator(\n  method: AggregateMethod\n): (turnResults: TurnEvaluation[]) => { passed: boolean; score: number } {\n  switch (method) {\n    case 'worst': return aggregateWorst;\n    case 'average': return aggregateAverage;\n    case 'weighted': return aggregateWeighted;\n    case 'final_turn': return aggregateFinalTurn;\n    case 'majority': return aggregateMajority;\n    default: return aggregateAverage;\n  }\n}\n\n/**\n * Find problematic turns in a conversation\n *\n * @param result - Conversation evaluation result\n * @param scoreThreshold - Minimum acceptable score (default 70)\n * @returns Array of problematic turn results\n */\nexport function getProblematicTurns(\n  result: ConversationEvalResult,\n  scoreThreshold = 70\n): TurnEvaluation[] {\n  return result.turnResults.filter(t => !t.passed || t.score < scoreThreshold);\n}\n\n/**\n * Calculate conversation quality trend\n *\n * @param result - Conversation evaluation result\n * @returns Trend analysis\n */\nexport function analyzeConversationTrend(result: ConversationEvalResult): {\n  direction: 'improving' | 'declining' | 'stable';\n  firstHalfAvg: number;\n  secondHalfAvg: number;\n} {\n  const turns = result.turnResults;\n  if (turns.length < 2) {\n    return { direction: 'stable', firstHalfAvg: 0, secondHalfAvg: 0 };\n  }\n\n  const midpoint = Math.floor(turns.length / 2);\n  const firstHalf = turns.slice(0, midpoint);\n  const secondHalf = turns.slice(midpoint);\n\n  const firstHalfAvg = firstHalf.reduce((sum, t) => sum + t.score, 0) / firstHalf.length;\n  const secondHalfAvg = secondHalf.reduce((sum, t) => sum + t.score, 0) / secondHalf.length;\n\n  const diff = secondHalfAvg - firstHalfAvg;\n  let direction: 'improving' | 'declining' | 'stable';\n\n  if (diff > 5) direction = 'improving';\n  else if (diff < -5) direction = 'declining';\n  else direction = 'stable';\n\n  return { direction, firstHalfAvg, secondHalfAvg };\n}\n"]}
@@ -0,0 +1,205 @@
1
+ /**
2
+ * ThinkHive SDK v3.0 - Deterministic Graders API
3
+ *
4
+ * API for running deterministic (code-based) evaluations
5
+ */
6
+ export type RuleType = 'regex' | 'contains' | 'not_contains' | 'json_valid' | 'json_schema' | 'length' | 'pii_check' | 'sentiment' | 'latency' | 'token_count';
7
+ export interface DeterministicEvalResult {
8
+ passed: boolean;
9
+ score: number;
10
+ reasoning: string;
11
+ ruleResults?: RuleResult[];
12
+ metadata?: Record<string, unknown>;
13
+ }
14
+ export interface RuleResult {
15
+ ruleId: string;
16
+ ruleName: string;
17
+ ruleType: RuleType;
18
+ passed: boolean;
19
+ score: number;
20
+ details?: string;
21
+ }
22
+ export interface EvaluateOptions {
23
+ traceId: string;
24
+ criterionId: string;
25
+ }
26
+ export interface BulkEvaluateOptions {
27
+ evaluations: Array<{
28
+ traceId: string;
29
+ criterionId: string;
30
+ }>;
31
+ }
32
+ export interface BulkEvaluateResult {
33
+ results: Array<{
34
+ traceId: string;
35
+ criterionId: string;
36
+ passed: boolean;
37
+ score: number;
38
+ error?: string;
39
+ }>;
40
+ summary: {
41
+ total: number;
42
+ passed: number;
43
+ failed: number;
44
+ passRate: number;
45
+ };
46
+ }
47
+ export interface RuleTypeInfo {
48
+ id: RuleType;
49
+ name: string;
50
+ description: string;
51
+ configFields: string[];
52
+ }
53
+ export interface RuleTemplate {
54
+ id: string;
55
+ name: string;
56
+ description: string;
57
+ ruleType: RuleType;
58
+ config: Record<string, unknown>;
59
+ }
60
+ /**
61
+ * Deterministic Graders API client for code-based evaluations
62
+ */
63
+ export declare const deterministicGraders: {
64
+ /**
65
+ * Run deterministic evaluation on a single trace
66
+ *
67
+ * @example
68
+ * ```typescript
69
+ * const result = await deterministicGraders.evaluate({
70
+ * traceId: 'trace_123',
71
+ * criterionId: 'criterion_456',
72
+ * });
73
+ * console.log(`Passed: ${result.passed}, Score: ${result.score}`);
74
+ * ```
75
+ */
76
+ evaluate(options: EvaluateOptions): Promise<DeterministicEvalResult>;
77
+ /**
78
+ * Run deterministic evaluations on multiple traces
79
+ *
80
+ * @example
81
+ * ```typescript
82
+ * const { results, summary } = await deterministicGraders.bulkEvaluate({
83
+ * evaluations: [
84
+ * { traceId: 'trace_1', criterionId: 'criterion_456' },
85
+ * { traceId: 'trace_2', criterionId: 'criterion_456' },
86
+ * { traceId: 'trace_3', criterionId: 'criterion_456' },
87
+ * ],
88
+ * });
89
+ * console.log(`Pass rate: ${summary.passRate * 100}%`);
90
+ * ```
91
+ */
92
+ bulkEvaluate(options: BulkEvaluateOptions): Promise<BulkEvaluateResult>;
93
+ /**
94
+ * Get available rule types with descriptions
95
+ *
96
+ * @example
97
+ * ```typescript
98
+ * const ruleTypes = await deterministicGraders.getRuleTypes();
99
+ * for (const type of ruleTypes) {
100
+ * console.log(`${type.name}: ${type.description}`);
101
+ * }
102
+ * ```
103
+ */
104
+ getRuleTypes(): Promise<RuleTypeInfo[]>;
105
+ /**
106
+ * Get rule templates
107
+ *
108
+ * @example
109
+ * ```typescript
110
+ * const templates = await deterministicGraders.getTemplates();
111
+ * const noPiiTemplate = templates.find(t => t.id === 'no_pii');
112
+ * ```
113
+ */
114
+ getTemplates(): Promise<RuleTemplate[]>;
115
+ };
116
+ /**
117
+ * Create a regex rule configuration
118
+ *
119
+ * @param pattern - Regular expression pattern
120
+ * @param flags - Regex flags (default: 'gi')
121
+ * @returns Rule configuration object
122
+ *
123
+ * @example
124
+ * ```typescript
125
+ * const config = createRegexRule('\\b(error|fail)\\b', 'gi');
126
+ * ```
127
+ */
128
+ export declare function createRegexRule(pattern: string, flags?: string): {
129
+ pattern: string;
130
+ flags: string;
131
+ };
132
+ /**
133
+ * Create a contains rule configuration
134
+ *
135
+ * @param values - Strings to check for
136
+ * @param caseSensitive - Whether comparison is case-sensitive
137
+ * @returns Rule configuration object
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * const config = createContainsRule(['hello', 'hi', 'hey'], false);
142
+ * ```
143
+ */
144
+ export declare function createContainsRule(values: string[], caseSensitive?: boolean): {
145
+ values: string[];
146
+ caseSensitive: boolean;
147
+ };
148
+ /**
149
+ * Create a length rule configuration
150
+ *
151
+ * @param min - Minimum length (optional)
152
+ * @param max - Maximum length (optional)
153
+ * @returns Rule configuration object
154
+ *
155
+ * @example
156
+ * ```typescript
157
+ * const config = createLengthRule(50, 1000);
158
+ * ```
159
+ */
160
+ export declare function createLengthRule(min?: number, max?: number): {
161
+ min?: number;
162
+ max?: number;
163
+ };
164
+ /**
165
+ * Create a JSON schema rule configuration
166
+ *
167
+ * @param schema - JSON Schema object
168
+ * @returns Rule configuration object
169
+ *
170
+ * @example
171
+ * ```typescript
172
+ * const config = createJsonSchemaRule({
173
+ * type: 'object',
174
+ * required: ['name', 'email'],
175
+ * properties: {
176
+ * name: { type: 'string' },
177
+ * email: { type: 'string', format: 'email' },
178
+ * },
179
+ * });
180
+ * ```
181
+ */
182
+ export declare function createJsonSchemaRule(schema: Record<string, unknown>): {
183
+ schema: Record<string, unknown>;
184
+ };
185
+ /**
186
+ * Check if all rule results passed
187
+ *
188
+ * @param results - Array of rule results
189
+ * @returns Whether all rules passed
190
+ */
191
+ export declare function allRulesPassed(results: RuleResult[]): boolean;
192
+ /**
193
+ * Get failed rules from results
194
+ *
195
+ * @param results - Array of rule results
196
+ * @returns Array of failed rule results
197
+ */
198
+ export declare function getFailedRules(results: RuleResult[]): RuleResult[];
199
+ /**
200
+ * Calculate average score from rule results
201
+ *
202
+ * @param results - Array of rule results
203
+ * @returns Average score (0-100)
204
+ */
205
+ export declare function calculateAverageScore(results: RuleResult[]): number;