@thinkhive/sdk 3.1.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MIGRATION.md +83 -12
- package/README.md +279 -128
- package/dist/api/agents.d.ts +169 -0
- package/dist/api/agents.js +185 -0
- package/dist/api/apiKeys.d.ts +252 -0
- package/dist/api/apiKeys.js +298 -0
- package/dist/api/business-metrics.d.ts +188 -0
- package/dist/api/business-metrics.js +213 -0
- package/dist/api/calibration.d.ts +0 -62
- package/dist/api/calibration.js +5 -48
- package/dist/api/claims.js +10 -7
- package/dist/api/conversation-eval.d.ts +200 -0
- package/dist/api/conversation-eval.js +235 -0
- package/dist/api/deterministic-graders.d.ts +205 -0
- package/dist/api/deterministic-graders.js +191 -0
- package/dist/api/eval-health.d.ts +250 -0
- package/dist/api/eval-health.js +224 -0
- package/dist/api/human-review.d.ts +275 -0
- package/dist/api/human-review.js +236 -0
- package/dist/api/nondeterminism.d.ts +300 -0
- package/dist/api/nondeterminism.js +250 -0
- package/dist/api/quality-metrics.d.ts +303 -0
- package/dist/api/quality-metrics.js +198 -0
- package/dist/api/roi-analytics.d.ts +263 -0
- package/dist/api/roi-analytics.js +204 -0
- package/dist/api/runs.js +12 -6
- package/dist/api/transcript-patterns.d.ts +204 -0
- package/dist/api/transcript-patterns.js +227 -0
- package/dist/core/client.d.ts +83 -9
- package/dist/core/client.js +229 -34
- package/dist/core/config.d.ts +2 -3
- package/dist/core/config.js +3 -4
- package/dist/core/types.d.ts +57 -4
- package/dist/core/types.js +1 -1
- package/dist/index.d.ts +429 -76
- package/dist/index.js +262 -42
- package/package.json +2 -2
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ThinkHive SDK v3.0 - Conversation Evaluation API
|
|
3
|
+
*
|
|
4
|
+
* API for multi-turn conversation evaluation
|
|
5
|
+
*/
|
|
6
|
+
export type AggregateMethod = 'worst' | 'average' | 'weighted' | 'final_turn' | 'majority';
|
|
7
|
+
export interface SessionTrace {
|
|
8
|
+
id: string;
|
|
9
|
+
sessionId: string;
|
|
10
|
+
turnNumber: number;
|
|
11
|
+
userMessage: string;
|
|
12
|
+
agentResponse: string;
|
|
13
|
+
timestamp: string;
|
|
14
|
+
metadata?: Record<string, unknown>;
|
|
15
|
+
}
|
|
16
|
+
export interface TurnEvaluation {
|
|
17
|
+
traceId: string;
|
|
18
|
+
turnNumber: number;
|
|
19
|
+
passed: boolean;
|
|
20
|
+
score: number;
|
|
21
|
+
reasoning: string;
|
|
22
|
+
}
|
|
23
|
+
export interface ConversationEvalResult {
|
|
24
|
+
sessionId: string;
|
|
25
|
+
criterionId: string;
|
|
26
|
+
turnCount: number;
|
|
27
|
+
turnResults: TurnEvaluation[];
|
|
28
|
+
aggregatePassed: boolean;
|
|
29
|
+
aggregateScore: number;
|
|
30
|
+
aggregateMethod: AggregateMethod;
|
|
31
|
+
reasoning: string;
|
|
32
|
+
metadata?: Record<string, unknown>;
|
|
33
|
+
}
|
|
34
|
+
export interface EvaluateConversationOptions {
|
|
35
|
+
sessionId: string;
|
|
36
|
+
criterionId: string;
|
|
37
|
+
options?: {
|
|
38
|
+
aggregateMethod?: AggregateMethod;
|
|
39
|
+
minTurns?: number;
|
|
40
|
+
maxTurns?: number;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
export interface AggregationMethodInfo {
|
|
44
|
+
id: AggregateMethod;
|
|
45
|
+
name: string;
|
|
46
|
+
description: string;
|
|
47
|
+
useCase: string;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Conversation Evaluation API client for multi-turn evaluation
|
|
51
|
+
*/
|
|
52
|
+
export declare const conversationEval: {
|
|
53
|
+
/**
|
|
54
|
+
* Get traces for a conversation session
|
|
55
|
+
*
|
|
56
|
+
* @example
|
|
57
|
+
* ```typescript
|
|
58
|
+
* const traces = await conversationEval.getSessionTraces('session_123');
|
|
59
|
+
* console.log(`Conversation has ${traces.length} turns`);
|
|
60
|
+
* ```
|
|
61
|
+
*/
|
|
62
|
+
getSessionTraces(sessionId: string): Promise<SessionTrace[]>;
|
|
63
|
+
/**
|
|
64
|
+
* Run conversation-level evaluation
|
|
65
|
+
*
|
|
66
|
+
* @example
|
|
67
|
+
* ```typescript
|
|
68
|
+
* const result = await conversationEval.evaluate({
|
|
69
|
+
* sessionId: 'session_123',
|
|
70
|
+
* criterionId: 'criterion_456',
|
|
71
|
+
* options: {
|
|
72
|
+
* aggregateMethod: 'average',
|
|
73
|
+
* minTurns: 2,
|
|
74
|
+
* },
|
|
75
|
+
* });
|
|
76
|
+
* console.log(`Conversation score: ${result.aggregateScore}`);
|
|
77
|
+
* ```
|
|
78
|
+
*/
|
|
79
|
+
evaluate(options: EvaluateConversationOptions): Promise<ConversationEvalResult>;
|
|
80
|
+
/**
|
|
81
|
+
* Get available aggregation methods with descriptions
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* ```typescript
|
|
85
|
+
* const methods = await conversationEval.getAggregationMethods();
|
|
86
|
+
* for (const method of methods) {
|
|
87
|
+
* console.log(`${method.name}: ${method.description}`);
|
|
88
|
+
* }
|
|
89
|
+
* ```
|
|
90
|
+
*/
|
|
91
|
+
getAggregationMethods(): Promise<AggregationMethodInfo[]>;
|
|
92
|
+
};
|
|
93
|
+
/**
|
|
94
|
+
* Calculate worst-turn aggregation
|
|
95
|
+
*
|
|
96
|
+
* @param turnResults - Array of turn evaluation results
|
|
97
|
+
* @returns Aggregated result using worst turn logic
|
|
98
|
+
*
|
|
99
|
+
* @example
|
|
100
|
+
* ```typescript
|
|
101
|
+
* const result = aggregateWorst(turnResults);
|
|
102
|
+
* // Fails if any turn fails
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
export declare function aggregateWorst(turnResults: TurnEvaluation[]): {
|
|
106
|
+
passed: boolean;
|
|
107
|
+
score: number;
|
|
108
|
+
};
|
|
109
|
+
/**
|
|
110
|
+
* Calculate average aggregation
|
|
111
|
+
*
|
|
112
|
+
* @param turnResults - Array of turn evaluation results
|
|
113
|
+
* @returns Aggregated result using average logic
|
|
114
|
+
*
|
|
115
|
+
* @example
|
|
116
|
+
* ```typescript
|
|
117
|
+
* const result = aggregateAverage(turnResults);
|
|
118
|
+
* ```
|
|
119
|
+
*/
|
|
120
|
+
export declare function aggregateAverage(turnResults: TurnEvaluation[]): {
|
|
121
|
+
passed: boolean;
|
|
122
|
+
score: number;
|
|
123
|
+
};
|
|
124
|
+
/**
|
|
125
|
+
* Calculate weighted average aggregation (later turns weighted more)
|
|
126
|
+
*
|
|
127
|
+
* @param turnResults - Array of turn evaluation results
|
|
128
|
+
* @returns Aggregated result using weighted average logic
|
|
129
|
+
*
|
|
130
|
+
* @example
|
|
131
|
+
* ```typescript
|
|
132
|
+
* const result = aggregateWeighted(turnResults);
|
|
133
|
+
* // Later turns have higher weight
|
|
134
|
+
* ```
|
|
135
|
+
*/
|
|
136
|
+
export declare function aggregateWeighted(turnResults: TurnEvaluation[]): {
|
|
137
|
+
passed: boolean;
|
|
138
|
+
score: number;
|
|
139
|
+
};
|
|
140
|
+
/**
|
|
141
|
+
* Calculate final-turn aggregation
|
|
142
|
+
*
|
|
143
|
+
* @param turnResults - Array of turn evaluation results
|
|
144
|
+
* @returns Aggregated result using only the final turn
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* ```typescript
|
|
148
|
+
* const result = aggregateFinalTurn(turnResults);
|
|
149
|
+
* // Only final turn matters
|
|
150
|
+
* ```
|
|
151
|
+
*/
|
|
152
|
+
export declare function aggregateFinalTurn(turnResults: TurnEvaluation[]): {
|
|
153
|
+
passed: boolean;
|
|
154
|
+
score: number;
|
|
155
|
+
};
|
|
156
|
+
/**
|
|
157
|
+
* Calculate majority vote aggregation
|
|
158
|
+
*
|
|
159
|
+
* @param turnResults - Array of turn evaluation results
|
|
160
|
+
* @returns Aggregated result using majority vote logic
|
|
161
|
+
*
|
|
162
|
+
* @example
|
|
163
|
+
* ```typescript
|
|
164
|
+
* const result = aggregateMajority(turnResults);
|
|
165
|
+
* // Passes if majority of turns pass
|
|
166
|
+
* ```
|
|
167
|
+
*/
|
|
168
|
+
export declare function aggregateMajority(turnResults: TurnEvaluation[]): {
|
|
169
|
+
passed: boolean;
|
|
170
|
+
score: number;
|
|
171
|
+
};
|
|
172
|
+
/**
|
|
173
|
+
* Get appropriate aggregation function for a method
|
|
174
|
+
*
|
|
175
|
+
* @param method - Aggregation method name
|
|
176
|
+
* @returns Aggregation function
|
|
177
|
+
*/
|
|
178
|
+
export declare function getAggregator(method: AggregateMethod): (turnResults: TurnEvaluation[]) => {
|
|
179
|
+
passed: boolean;
|
|
180
|
+
score: number;
|
|
181
|
+
};
|
|
182
|
+
/**
|
|
183
|
+
* Find problematic turns in a conversation
|
|
184
|
+
*
|
|
185
|
+
* @param result - Conversation evaluation result
|
|
186
|
+
* @param scoreThreshold - Minimum acceptable score (default 70)
|
|
187
|
+
* @returns Array of problematic turn results
|
|
188
|
+
*/
|
|
189
|
+
export declare function getProblematicTurns(result: ConversationEvalResult, scoreThreshold?: number): TurnEvaluation[];
|
|
190
|
+
/**
|
|
191
|
+
* Calculate conversation quality trend
|
|
192
|
+
*
|
|
193
|
+
* @param result - Conversation evaluation result
|
|
194
|
+
* @returns Trend analysis
|
|
195
|
+
*/
|
|
196
|
+
export declare function analyzeConversationTrend(result: ConversationEvalResult): {
|
|
197
|
+
direction: 'improving' | 'declining' | 'stable';
|
|
198
|
+
firstHalfAvg: number;
|
|
199
|
+
secondHalfAvg: number;
|
|
200
|
+
};
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ThinkHive SDK v3.0 - Conversation Evaluation API
|
|
4
|
+
*
|
|
5
|
+
* API for multi-turn conversation evaluation
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.conversationEval = void 0;
|
|
9
|
+
exports.aggregateWorst = aggregateWorst;
|
|
10
|
+
exports.aggregateAverage = aggregateAverage;
|
|
11
|
+
exports.aggregateWeighted = aggregateWeighted;
|
|
12
|
+
exports.aggregateFinalTurn = aggregateFinalTurn;
|
|
13
|
+
exports.aggregateMajority = aggregateMajority;
|
|
14
|
+
exports.getAggregator = getAggregator;
|
|
15
|
+
exports.getProblematicTurns = getProblematicTurns;
|
|
16
|
+
exports.analyzeConversationTrend = analyzeConversationTrend;
|
|
17
|
+
const client_1 = require("../core/client");
|
|
18
|
+
// ============================================================================
|
|
19
|
+
// CONVERSATION EVAL API CLIENT
|
|
20
|
+
// ============================================================================
|
|
21
|
+
/**
|
|
22
|
+
* Conversation Evaluation API client for multi-turn evaluation
|
|
23
|
+
*/
|
|
24
|
+
exports.conversationEval = {
|
|
25
|
+
/**
|
|
26
|
+
* Get traces for a conversation session
|
|
27
|
+
*
|
|
28
|
+
* @example
|
|
29
|
+
* ```typescript
|
|
30
|
+
* const traces = await conversationEval.getSessionTraces('session_123');
|
|
31
|
+
* console.log(`Conversation has ${traces.length} turns`);
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
async getSessionTraces(sessionId) {
|
|
35
|
+
return (0, client_1.apiRequestWithData)(`/conversation-eval/traces?sessionId=${sessionId}`, { apiVersion: 'none' });
|
|
36
|
+
},
|
|
37
|
+
/**
|
|
38
|
+
* Run conversation-level evaluation
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const result = await conversationEval.evaluate({
|
|
43
|
+
* sessionId: 'session_123',
|
|
44
|
+
* criterionId: 'criterion_456',
|
|
45
|
+
* options: {
|
|
46
|
+
* aggregateMethod: 'average',
|
|
47
|
+
* minTurns: 2,
|
|
48
|
+
* },
|
|
49
|
+
* });
|
|
50
|
+
* console.log(`Conversation score: ${result.aggregateScore}`);
|
|
51
|
+
* ```
|
|
52
|
+
*/
|
|
53
|
+
async evaluate(options) {
|
|
54
|
+
return (0, client_1.apiRequestWithData)('/conversation-eval/evaluate', {
|
|
55
|
+
method: 'POST',
|
|
56
|
+
body: options,
|
|
57
|
+
apiVersion: 'none',
|
|
58
|
+
});
|
|
59
|
+
},
|
|
60
|
+
/**
|
|
61
|
+
* Get available aggregation methods with descriptions
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* ```typescript
|
|
65
|
+
* const methods = await conversationEval.getAggregationMethods();
|
|
66
|
+
* for (const method of methods) {
|
|
67
|
+
* console.log(`${method.name}: ${method.description}`);
|
|
68
|
+
* }
|
|
69
|
+
* ```
|
|
70
|
+
*/
|
|
71
|
+
async getAggregationMethods() {
|
|
72
|
+
return (0, client_1.apiRequestWithData)('/conversation-eval/aggregation-methods', { apiVersion: 'none' });
|
|
73
|
+
},
|
|
74
|
+
};
|
|
75
|
+
// ============================================================================
|
|
76
|
+
// HELPER FUNCTIONS
|
|
77
|
+
// ============================================================================
|
|
78
|
+
/**
|
|
79
|
+
* Calculate worst-turn aggregation
|
|
80
|
+
*
|
|
81
|
+
* @param turnResults - Array of turn evaluation results
|
|
82
|
+
* @returns Aggregated result using worst turn logic
|
|
83
|
+
*
|
|
84
|
+
* @example
|
|
85
|
+
* ```typescript
|
|
86
|
+
* const result = aggregateWorst(turnResults);
|
|
87
|
+
* // Fails if any turn fails
|
|
88
|
+
* ```
|
|
89
|
+
*/
|
|
90
|
+
function aggregateWorst(turnResults) {
|
|
91
|
+
if (turnResults.length === 0)
|
|
92
|
+
return { passed: false, score: 0 };
|
|
93
|
+
const worstScore = Math.min(...turnResults.map(t => t.score));
|
|
94
|
+
const passed = turnResults.every(t => t.passed);
|
|
95
|
+
return { passed, score: worstScore };
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Calculate average aggregation
|
|
99
|
+
*
|
|
100
|
+
* @param turnResults - Array of turn evaluation results
|
|
101
|
+
* @returns Aggregated result using average logic
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```typescript
|
|
105
|
+
* const result = aggregateAverage(turnResults);
|
|
106
|
+
* ```
|
|
107
|
+
*/
|
|
108
|
+
function aggregateAverage(turnResults) {
|
|
109
|
+
if (turnResults.length === 0)
|
|
110
|
+
return { passed: false, score: 0 };
|
|
111
|
+
const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;
|
|
112
|
+
const passedCount = turnResults.filter(t => t.passed).length;
|
|
113
|
+
const passed = passedCount > turnResults.length / 2;
|
|
114
|
+
return { passed, score: avgScore };
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Calculate weighted average aggregation (later turns weighted more)
|
|
118
|
+
*
|
|
119
|
+
* @param turnResults - Array of turn evaluation results
|
|
120
|
+
* @returns Aggregated result using weighted average logic
|
|
121
|
+
*
|
|
122
|
+
* @example
|
|
123
|
+
* ```typescript
|
|
124
|
+
* const result = aggregateWeighted(turnResults);
|
|
125
|
+
* // Later turns have higher weight
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
128
|
+
function aggregateWeighted(turnResults) {
|
|
129
|
+
if (turnResults.length === 0)
|
|
130
|
+
return { passed: false, score: 0 };
|
|
131
|
+
// Linear weights: 1, 2, 3, ...
|
|
132
|
+
let weightedSum = 0;
|
|
133
|
+
let weightTotal = 0;
|
|
134
|
+
let weightedPassSum = 0;
|
|
135
|
+
turnResults.forEach((turn, index) => {
|
|
136
|
+
const weight = index + 1;
|
|
137
|
+
weightedSum += turn.score * weight;
|
|
138
|
+
weightedPassSum += (turn.passed ? 1 : 0) * weight;
|
|
139
|
+
weightTotal += weight;
|
|
140
|
+
});
|
|
141
|
+
const avgScore = weightedSum / weightTotal;
|
|
142
|
+
const passed = (weightedPassSum / weightTotal) > 0.5;
|
|
143
|
+
return { passed, score: avgScore };
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Calculate final-turn aggregation
|
|
147
|
+
*
|
|
148
|
+
* @param turnResults - Array of turn evaluation results
|
|
149
|
+
* @returns Aggregated result using only the final turn
|
|
150
|
+
*
|
|
151
|
+
* @example
|
|
152
|
+
* ```typescript
|
|
153
|
+
* const result = aggregateFinalTurn(turnResults);
|
|
154
|
+
* // Only final turn matters
|
|
155
|
+
* ```
|
|
156
|
+
*/
|
|
157
|
+
function aggregateFinalTurn(turnResults) {
|
|
158
|
+
if (turnResults.length === 0)
|
|
159
|
+
return { passed: false, score: 0 };
|
|
160
|
+
const finalTurn = turnResults[turnResults.length - 1];
|
|
161
|
+
return { passed: finalTurn.passed, score: finalTurn.score };
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Calculate majority vote aggregation
|
|
165
|
+
*
|
|
166
|
+
* @param turnResults - Array of turn evaluation results
|
|
167
|
+
* @returns Aggregated result using majority vote logic
|
|
168
|
+
*
|
|
169
|
+
* @example
|
|
170
|
+
* ```typescript
|
|
171
|
+
* const result = aggregateMajority(turnResults);
|
|
172
|
+
* // Passes if majority of turns pass
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
function aggregateMajority(turnResults) {
|
|
176
|
+
if (turnResults.length === 0)
|
|
177
|
+
return { passed: false, score: 0 };
|
|
178
|
+
const passedCount = turnResults.filter(t => t.passed).length;
|
|
179
|
+
const passed = passedCount > turnResults.length / 2;
|
|
180
|
+
const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;
|
|
181
|
+
return { passed, score: avgScore };
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Get appropriate aggregation function for a method
|
|
185
|
+
*
|
|
186
|
+
* @param method - Aggregation method name
|
|
187
|
+
* @returns Aggregation function
|
|
188
|
+
*/
|
|
189
|
+
function getAggregator(method) {
|
|
190
|
+
switch (method) {
|
|
191
|
+
case 'worst': return aggregateWorst;
|
|
192
|
+
case 'average': return aggregateAverage;
|
|
193
|
+
case 'weighted': return aggregateWeighted;
|
|
194
|
+
case 'final_turn': return aggregateFinalTurn;
|
|
195
|
+
case 'majority': return aggregateMajority;
|
|
196
|
+
default: return aggregateAverage;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Find problematic turns in a conversation
|
|
201
|
+
*
|
|
202
|
+
* @param result - Conversation evaluation result
|
|
203
|
+
* @param scoreThreshold - Minimum acceptable score (default 70)
|
|
204
|
+
* @returns Array of problematic turn results
|
|
205
|
+
*/
|
|
206
|
+
function getProblematicTurns(result, scoreThreshold = 70) {
|
|
207
|
+
return result.turnResults.filter(t => !t.passed || t.score < scoreThreshold);
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Calculate conversation quality trend
|
|
211
|
+
*
|
|
212
|
+
* @param result - Conversation evaluation result
|
|
213
|
+
* @returns Trend analysis
|
|
214
|
+
*/
|
|
215
|
+
function analyzeConversationTrend(result) {
|
|
216
|
+
const turns = result.turnResults;
|
|
217
|
+
if (turns.length < 2) {
|
|
218
|
+
return { direction: 'stable', firstHalfAvg: 0, secondHalfAvg: 0 };
|
|
219
|
+
}
|
|
220
|
+
const midpoint = Math.floor(turns.length / 2);
|
|
221
|
+
const firstHalf = turns.slice(0, midpoint);
|
|
222
|
+
const secondHalf = turns.slice(midpoint);
|
|
223
|
+
const firstHalfAvg = firstHalf.reduce((sum, t) => sum + t.score, 0) / firstHalf.length;
|
|
224
|
+
const secondHalfAvg = secondHalf.reduce((sum, t) => sum + t.score, 0) / secondHalf.length;
|
|
225
|
+
const diff = secondHalfAvg - firstHalfAvg;
|
|
226
|
+
let direction;
|
|
227
|
+
if (diff > 5)
|
|
228
|
+
direction = 'improving';
|
|
229
|
+
else if (diff < -5)
|
|
230
|
+
direction = 'declining';
|
|
231
|
+
else
|
|
232
|
+
direction = 'stable';
|
|
233
|
+
return { direction, firstHalfAvg, secondHalfAvg };
|
|
234
|
+
}
|
|
235
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"conversation-eval.js","sourceRoot":"","sources":["../../src/api/conversation-eval.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AA4IH,wCAOC;AAaD,4CAQC;AAcD,8CAmBC;AAcD,gDAKC;AAcD,8CAQC;AAQD,sCAWC;AASD,kDAKC;AAQD,4DAyBC;AAlTD,2CAAoD;AAuDpD,+EAA+E;AAC/E,+BAA+B;AAC/B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,gBAAgB,GAAG;IAC9B;;;;;;;;OAQG;IACH,KAAK,CAAC,gBAAgB,CAAC,SAAiB;QACtC,OAAO,IAAA,2BAAkB,EACvB,uCAAuC,SAAS,EAAE,EAClD,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAoC;QACjD,OAAO,IAAA,2BAAkB,EAAyB,6BAA6B,EAAE;YAC/E,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,qBAAqB;QACzB,OAAO,IAAA,2BAAkB,EACvB,wCAAwC,EACxC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,cAAc,CAAC,WAA6B;IAC1D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAEhD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;AACvC,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAgB,gBAAgB,CAAC,WAA6B;IAC5D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;IACvF,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IAEpD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,+BAA+B;IAC/B,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,WAAW,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;QAClC,MAAM,MAAM,GAAG,KAAK,GAAG,CAAC,CAAC;QACzB,WAAW,IAAI,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC;QACnC,eAAe,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;QAClD,WAAW,IAAI,MAAM,CAAC;IACxB,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,WAAW,GAAG,WAAW,CAAC;IAC3C,MAAM,MAAM,GAAG,CAAC,eAAe,GAAG,WAAW,CAAC,GAAG,GAAG,CAAC;IAErD,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,kBAAkB,CAAC,WAA6B;IAC9D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,SAAS,GAAG,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtD,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,CAAC,KAAK,EAAE,CAAC;AAC9D,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,iBAAiB,CAAC,WAA6B;IAC7D,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;IAEjE,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IAC7D,MAAM,MAAM,GAAG,WAAW,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;IACpD,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,WAAW,CAAC,MAAM,CAAC;IAEvF,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACrC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,aAAa,CAC3B,MAAuB;IAEvB,QAAQ,MAAM,EAAE,CAAC;QACf,KAAK,OAAO,CAAC,CAAC,OAAO,cAAc,CAAC;QACpC,KAAK,SAAS,CAAC,CAAC,OAAO,gBAAgB,CAAC;QACxC,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,KAAK,YAAY,CAAC,CAAC,OAAO,kBAAkB,CAAC;QAC7C,KAAK,UAAU,CAAC,CAAC,OAAO,iBAAiB,CAAC;QAC1C,OAAO,CAAC,CAAC,OAAO,gBAAgB,CAAC;IACnC,CAAC;AACH,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,mBAAmB,CACjC,MAA8B,EAC9B,cAAc,GAAG,EAAE;IAEnB,OAAO,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,GAAG,cAAc,CAAC,CAAC;AAC/E,CAAC;AAED;;;;;GAKG;AACH,SAAgB,wBAAwB,CAAC,MAA8B;IAKrE,MAAM,KAAK,GAAG,MAAM,CAAC,WAAW,CAAC;IACjC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC;IACpE,CAAC;IAED,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAC3C,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAEzC,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC;IACvF,MAAM,aAAa,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAE1F,MAAM,IAAI,GAAG,aAAa,GAAG,YAAY,CAAC;IAC1C,IAAI,SAA+C,CAAC;IAEpD,IAAI,IAAI,GAAG,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;SACjC,IAAI,IAAI,GAAG,CAAC,CAAC;QAAE,SAAS,GAAG,WAAW,CAAC;;QACvC,SAAS,GAAG,QAAQ,CAAC;IAE1B,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,CAAC;AACpD,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Conversation Evaluation API\n *\n * API for multi-turn conversation evaluation\n */\n\nimport { apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type AggregateMethod = 'worst' | 'average' | 'weighted' | 'final_turn' | 'majority';\n\nexport interface SessionTrace {\n  id: string;\n  sessionId: string;\n  turnNumber: number;\n  userMessage: string;\n  agentResponse: string;\n  timestamp: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface TurnEvaluation {\n  traceId: string;\n  turnNumber: number;\n  passed: boolean;\n  score: number;\n  reasoning: string;\n}\n\nexport interface ConversationEvalResult {\n  sessionId: string;\n  criterionId: string;\n  turnCount: number;\n  turnResults: TurnEvaluation[];\n  aggregatePassed: boolean;\n  aggregateScore: number;\n  aggregateMethod: AggregateMethod;\n  reasoning: string;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface EvaluateConversationOptions {\n  sessionId: string;\n  criterionId: string;\n  options?: {\n    aggregateMethod?: AggregateMethod;\n    minTurns?: number;\n    maxTurns?: number;\n  };\n}\n\nexport interface AggregationMethodInfo {\n  id: AggregateMethod;\n  name: string;\n  description: string;\n  useCase: string;\n}\n\n// ============================================================================\n// CONVERSATION EVAL API CLIENT\n// ============================================================================\n\n/**\n * Conversation Evaluation API client for multi-turn evaluation\n */\nexport const conversationEval = {\n  /**\n   * Get traces for a conversation session\n   *\n   * @example\n   * ```typescript\n   * const traces = await conversationEval.getSessionTraces('session_123');\n   * console.log(`Conversation has ${traces.length} turns`);\n   * ```\n   */\n  async getSessionTraces(sessionId: string): Promise<SessionTrace[]> {\n    return apiRequestWithData<SessionTrace[]>(\n      `/conversation-eval/traces?sessionId=${sessionId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Run conversation-level evaluation\n   *\n   * @example\n   * ```typescript\n   * const result = await conversationEval.evaluate({\n   *   sessionId: 'session_123',\n   *   criterionId: 'criterion_456',\n   *   options: {\n   *     aggregateMethod: 'average',\n   *     minTurns: 2,\n   *   },\n   * });\n   * console.log(`Conversation score: ${result.aggregateScore}`);\n   * ```\n   */\n  async evaluate(options: EvaluateConversationOptions): Promise<ConversationEvalResult> {\n    return apiRequestWithData<ConversationEvalResult>('/conversation-eval/evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get available aggregation methods with descriptions\n   *\n   * @example\n   * ```typescript\n   * const methods = await conversationEval.getAggregationMethods();\n   * for (const method of methods) {\n   *   console.log(`${method.name}: ${method.description}`);\n   * }\n   * ```\n   */\n  async getAggregationMethods(): Promise<AggregationMethodInfo[]> {\n    return apiRequestWithData<AggregationMethodInfo[]>(\n      '/conversation-eval/aggregation-methods',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate worst-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using worst turn logic\n *\n * @example\n * ```typescript\n * const result = aggregateWorst(turnResults);\n * // Fails if any turn fails\n * ```\n */\nexport function aggregateWorst(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const worstScore = Math.min(...turnResults.map(t => t.score));\n  const passed = turnResults.every(t => t.passed);\n\n  return { passed, score: worstScore };\n}\n\n/**\n * Calculate average aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using average logic\n *\n * @example\n * ```typescript\n * const result = aggregateAverage(turnResults);\n * ```\n */\nexport function aggregateAverage(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;\n  const passedCount = turnResults.filter(t => t.passed).length;\n  const passed = passedCount > turnResults.length / 2;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate weighted average aggregation (later turns weighted more)\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using weighted average logic\n *\n * @example\n * ```typescript\n * const result = aggregateWeighted(turnResults);\n * // Later turns have higher weight\n * ```\n */\nexport function aggregateWeighted(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  // Linear weights: 1, 2, 3, ...\n  let weightedSum = 0;\n  let weightTotal = 0;\n  let weightedPassSum = 0;\n\n  turnResults.forEach((turn, index) => {\n    const weight = index + 1;\n    weightedSum += turn.score * weight;\n    weightedPassSum += (turn.passed ? 1 : 0) * weight;\n    weightTotal += weight;\n  });\n\n  const avgScore = weightedSum / weightTotal;\n  const passed = (weightedPassSum / weightTotal) > 0.5;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Calculate final-turn aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using only the final turn\n *\n * @example\n * ```typescript\n * const result = aggregateFinalTurn(turnResults);\n * // Only final turn matters\n * ```\n */\nexport function aggregateFinalTurn(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const finalTurn = turnResults[turnResults.length - 1];\n  return { passed: finalTurn.passed, score: finalTurn.score };\n}\n\n/**\n * Calculate majority vote aggregation\n *\n * @param turnResults - Array of turn evaluation results\n * @returns Aggregated result using majority vote logic\n *\n * @example\n * ```typescript\n * const result = aggregateMajority(turnResults);\n * // Passes if majority of turns pass\n * ```\n */\nexport function aggregateMajority(turnResults: TurnEvaluation[]): { passed: boolean; score: number } {\n  if (turnResults.length === 0) return { passed: false, score: 0 };\n\n  const passedCount = turnResults.filter(t => t.passed).length;\n  const passed = passedCount > turnResults.length / 2;\n  const avgScore = turnResults.reduce((sum, t) => sum + t.score, 0) / turnResults.length;\n\n  return { passed, score: avgScore };\n}\n\n/**\n * Get appropriate aggregation function for a method\n *\n * @param method - Aggregation method name\n * @returns Aggregation function\n */\nexport function getAggregator(\n  method: AggregateMethod\n): (turnResults: TurnEvaluation[]) => { passed: boolean; score: number } {\n  switch (method) {\n    case 'worst': return aggregateWorst;\n    case 'average': return aggregateAverage;\n    case 'weighted': return aggregateWeighted;\n    case 'final_turn': return aggregateFinalTurn;\n    case 'majority': return aggregateMajority;\n    default: return aggregateAverage;\n  }\n}\n\n/**\n * Find problematic turns in a conversation\n *\n * @param result - Conversation evaluation result\n * @param scoreThreshold - Minimum acceptable score (default 70)\n * @returns Array of problematic turn results\n */\nexport function getProblematicTurns(\n  result: ConversationEvalResult,\n  scoreThreshold = 70\n): TurnEvaluation[] {\n  return result.turnResults.filter(t => !t.passed || t.score < scoreThreshold);\n}\n\n/**\n * Calculate conversation quality trend\n *\n * @param result - Conversation evaluation result\n * @returns Trend analysis\n */\nexport function analyzeConversationTrend(result: ConversationEvalResult): {\n  direction: 'improving' | 'declining' | 'stable';\n  firstHalfAvg: number;\n  secondHalfAvg: number;\n} {\n  const turns = result.turnResults;\n  if (turns.length < 2) {\n    return { direction: 'stable', firstHalfAvg: 0, secondHalfAvg: 0 };\n  }\n\n  const midpoint = Math.floor(turns.length / 2);\n  const firstHalf = turns.slice(0, midpoint);\n  const secondHalf = turns.slice(midpoint);\n\n  const firstHalfAvg = firstHalf.reduce((sum, t) => sum + t.score, 0) / firstHalf.length;\n  const secondHalfAvg = secondHalf.reduce((sum, t) => sum + t.score, 0) / secondHalf.length;\n\n  const diff = secondHalfAvg - firstHalfAvg;\n  let direction: 'improving' | 'declining' | 'stable';\n\n  if (diff > 5) direction = 'improving';\n  else if (diff < -5) direction = 'declining';\n  else direction = 'stable';\n\n  return { direction, firstHalfAvg, secondHalfAvg };\n}\n"]}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ThinkHive SDK v3.0 - Deterministic Graders API
|
|
3
|
+
*
|
|
4
|
+
* API for running deterministic (code-based) evaluations
|
|
5
|
+
*/
|
|
6
|
+
export type RuleType = 'regex' | 'contains' | 'not_contains' | 'json_valid' | 'json_schema' | 'length' | 'pii_check' | 'sentiment' | 'latency' | 'token_count';
|
|
7
|
+
export interface DeterministicEvalResult {
|
|
8
|
+
passed: boolean;
|
|
9
|
+
score: number;
|
|
10
|
+
reasoning: string;
|
|
11
|
+
ruleResults?: RuleResult[];
|
|
12
|
+
metadata?: Record<string, unknown>;
|
|
13
|
+
}
|
|
14
|
+
export interface RuleResult {
|
|
15
|
+
ruleId: string;
|
|
16
|
+
ruleName: string;
|
|
17
|
+
ruleType: RuleType;
|
|
18
|
+
passed: boolean;
|
|
19
|
+
score: number;
|
|
20
|
+
details?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface EvaluateOptions {
|
|
23
|
+
traceId: string;
|
|
24
|
+
criterionId: string;
|
|
25
|
+
}
|
|
26
|
+
export interface BulkEvaluateOptions {
|
|
27
|
+
evaluations: Array<{
|
|
28
|
+
traceId: string;
|
|
29
|
+
criterionId: string;
|
|
30
|
+
}>;
|
|
31
|
+
}
|
|
32
|
+
export interface BulkEvaluateResult {
|
|
33
|
+
results: Array<{
|
|
34
|
+
traceId: string;
|
|
35
|
+
criterionId: string;
|
|
36
|
+
passed: boolean;
|
|
37
|
+
score: number;
|
|
38
|
+
error?: string;
|
|
39
|
+
}>;
|
|
40
|
+
summary: {
|
|
41
|
+
total: number;
|
|
42
|
+
passed: number;
|
|
43
|
+
failed: number;
|
|
44
|
+
passRate: number;
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
export interface RuleTypeInfo {
|
|
48
|
+
id: RuleType;
|
|
49
|
+
name: string;
|
|
50
|
+
description: string;
|
|
51
|
+
configFields: string[];
|
|
52
|
+
}
|
|
53
|
+
export interface RuleTemplate {
|
|
54
|
+
id: string;
|
|
55
|
+
name: string;
|
|
56
|
+
description: string;
|
|
57
|
+
ruleType: RuleType;
|
|
58
|
+
config: Record<string, unknown>;
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Deterministic Graders API client for code-based evaluations
|
|
62
|
+
*/
|
|
63
|
+
export declare const deterministicGraders: {
|
|
64
|
+
/**
|
|
65
|
+
* Run deterministic evaluation on a single trace
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```typescript
|
|
69
|
+
* const result = await deterministicGraders.evaluate({
|
|
70
|
+
* traceId: 'trace_123',
|
|
71
|
+
* criterionId: 'criterion_456',
|
|
72
|
+
* });
|
|
73
|
+
* console.log(`Passed: ${result.passed}, Score: ${result.score}`);
|
|
74
|
+
* ```
|
|
75
|
+
*/
|
|
76
|
+
evaluate(options: EvaluateOptions): Promise<DeterministicEvalResult>;
|
|
77
|
+
/**
|
|
78
|
+
* Run deterministic evaluations on multiple traces
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* ```typescript
|
|
82
|
+
* const { results, summary } = await deterministicGraders.bulkEvaluate({
|
|
83
|
+
* evaluations: [
|
|
84
|
+
* { traceId: 'trace_1', criterionId: 'criterion_456' },
|
|
85
|
+
* { traceId: 'trace_2', criterionId: 'criterion_456' },
|
|
86
|
+
* { traceId: 'trace_3', criterionId: 'criterion_456' },
|
|
87
|
+
* ],
|
|
88
|
+
* });
|
|
89
|
+
* console.log(`Pass rate: ${summary.passRate * 100}%`);
|
|
90
|
+
* ```
|
|
91
|
+
*/
|
|
92
|
+
bulkEvaluate(options: BulkEvaluateOptions): Promise<BulkEvaluateResult>;
|
|
93
|
+
/**
|
|
94
|
+
* Get available rule types with descriptions
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```typescript
|
|
98
|
+
* const ruleTypes = await deterministicGraders.getRuleTypes();
|
|
99
|
+
* for (const type of ruleTypes) {
|
|
100
|
+
* console.log(`${type.name}: ${type.description}`);
|
|
101
|
+
* }
|
|
102
|
+
* ```
|
|
103
|
+
*/
|
|
104
|
+
getRuleTypes(): Promise<RuleTypeInfo[]>;
|
|
105
|
+
/**
|
|
106
|
+
* Get rule templates
|
|
107
|
+
*
|
|
108
|
+
* @example
|
|
109
|
+
* ```typescript
|
|
110
|
+
* const templates = await deterministicGraders.getTemplates();
|
|
111
|
+
* const noPiiTemplate = templates.find(t => t.id === 'no_pii');
|
|
112
|
+
* ```
|
|
113
|
+
*/
|
|
114
|
+
getTemplates(): Promise<RuleTemplate[]>;
|
|
115
|
+
};
|
|
116
|
+
/**
|
|
117
|
+
* Create a regex rule configuration
|
|
118
|
+
*
|
|
119
|
+
* @param pattern - Regular expression pattern
|
|
120
|
+
* @param flags - Regex flags (default: 'gi')
|
|
121
|
+
* @returns Rule configuration object
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
* ```typescript
|
|
125
|
+
* const config = createRegexRule('\\b(error|fail)\\b', 'gi');
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
128
|
+
export declare function createRegexRule(pattern: string, flags?: string): {
|
|
129
|
+
pattern: string;
|
|
130
|
+
flags: string;
|
|
131
|
+
};
|
|
132
|
+
/**
|
|
133
|
+
* Create a contains rule configuration
|
|
134
|
+
*
|
|
135
|
+
* @param values - Strings to check for
|
|
136
|
+
* @param caseSensitive - Whether comparison is case-sensitive
|
|
137
|
+
* @returns Rule configuration object
|
|
138
|
+
*
|
|
139
|
+
* @example
|
|
140
|
+
* ```typescript
|
|
141
|
+
* const config = createContainsRule(['hello', 'hi', 'hey'], false);
|
|
142
|
+
* ```
|
|
143
|
+
*/
|
|
144
|
+
export declare function createContainsRule(values: string[], caseSensitive?: boolean): {
|
|
145
|
+
values: string[];
|
|
146
|
+
caseSensitive: boolean;
|
|
147
|
+
};
|
|
148
|
+
/**
|
|
149
|
+
* Create a length rule configuration
|
|
150
|
+
*
|
|
151
|
+
* @param min - Minimum length (optional)
|
|
152
|
+
* @param max - Maximum length (optional)
|
|
153
|
+
* @returns Rule configuration object
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* const config = createLengthRule(50, 1000);
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
export declare function createLengthRule(min?: number, max?: number): {
|
|
161
|
+
min?: number;
|
|
162
|
+
max?: number;
|
|
163
|
+
};
|
|
164
|
+
/**
|
|
165
|
+
* Create a JSON schema rule configuration
|
|
166
|
+
*
|
|
167
|
+
* @param schema - JSON Schema object
|
|
168
|
+
* @returns Rule configuration object
|
|
169
|
+
*
|
|
170
|
+
* @example
|
|
171
|
+
* ```typescript
|
|
172
|
+
* const config = createJsonSchemaRule({
|
|
173
|
+
* type: 'object',
|
|
174
|
+
* required: ['name', 'email'],
|
|
175
|
+
* properties: {
|
|
176
|
+
* name: { type: 'string' },
|
|
177
|
+
* email: { type: 'string', format: 'email' },
|
|
178
|
+
* },
|
|
179
|
+
* });
|
|
180
|
+
* ```
|
|
181
|
+
*/
|
|
182
|
+
export declare function createJsonSchemaRule(schema: Record<string, unknown>): {
|
|
183
|
+
schema: Record<string, unknown>;
|
|
184
|
+
};
|
|
185
|
+
/**
|
|
186
|
+
* Check if all rule results passed
|
|
187
|
+
*
|
|
188
|
+
* @param results - Array of rule results
|
|
189
|
+
* @returns Whether all rules passed
|
|
190
|
+
*/
|
|
191
|
+
export declare function allRulesPassed(results: RuleResult[]): boolean;
|
|
192
|
+
/**
|
|
193
|
+
* Get failed rules from results
|
|
194
|
+
*
|
|
195
|
+
* @param results - Array of rule results
|
|
196
|
+
* @returns Array of failed rule results
|
|
197
|
+
*/
|
|
198
|
+
export declare function getFailedRules(results: RuleResult[]): RuleResult[];
|
|
199
|
+
/**
|
|
200
|
+
* Calculate average score from rule results
|
|
201
|
+
*
|
|
202
|
+
* @param results - Array of rule results
|
|
203
|
+
* @returns Average score (0-100)
|
|
204
|
+
*/
|
|
205
|
+
export declare function calculateAverageScore(results: RuleResult[]): number;
|