@thinkhive/sdk 3.1.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MIGRATION.md +83 -12
- package/README.md +279 -128
- package/dist/api/agents.d.ts +169 -0
- package/dist/api/agents.js +185 -0
- package/dist/api/apiKeys.d.ts +252 -0
- package/dist/api/apiKeys.js +298 -0
- package/dist/api/business-metrics.d.ts +188 -0
- package/dist/api/business-metrics.js +213 -0
- package/dist/api/calibration.d.ts +0 -62
- package/dist/api/calibration.js +5 -48
- package/dist/api/claims.js +10 -7
- package/dist/api/conversation-eval.d.ts +200 -0
- package/dist/api/conversation-eval.js +235 -0
- package/dist/api/deterministic-graders.d.ts +205 -0
- package/dist/api/deterministic-graders.js +191 -0
- package/dist/api/eval-health.d.ts +250 -0
- package/dist/api/eval-health.js +224 -0
- package/dist/api/human-review.d.ts +275 -0
- package/dist/api/human-review.js +236 -0
- package/dist/api/nondeterminism.d.ts +300 -0
- package/dist/api/nondeterminism.js +250 -0
- package/dist/api/quality-metrics.d.ts +303 -0
- package/dist/api/quality-metrics.js +198 -0
- package/dist/api/roi-analytics.d.ts +263 -0
- package/dist/api/roi-analytics.js +204 -0
- package/dist/api/runs.js +12 -6
- package/dist/api/transcript-patterns.d.ts +204 -0
- package/dist/api/transcript-patterns.js +227 -0
- package/dist/core/client.d.ts +83 -9
- package/dist/core/client.js +229 -34
- package/dist/core/config.d.ts +2 -3
- package/dist/core/config.js +3 -4
- package/dist/core/types.d.ts +57 -4
- package/dist/core/types.js +1 -1
- package/dist/index.d.ts +429 -76
- package/dist/index.js +262 -42
- package/package.json +2 -2
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ThinkHive SDK v3.0 - Human Review API
|
|
4
|
+
*
|
|
5
|
+
* API for managing human review queue, calibration sets, and reviewer management
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.humanReview = void 0;
|
|
9
|
+
const client_1 = require("../core/client");
|
|
10
|
+
// ============================================================================
|
|
11
|
+
// HUMAN REVIEW API CLIENT
|
|
12
|
+
// ============================================================================
|
|
13
|
+
/**
|
|
14
|
+
* Human Review API client for managing human review queue and calibration
|
|
15
|
+
*/
|
|
16
|
+
exports.humanReview = {
|
|
17
|
+
/**
|
|
18
|
+
* Get pending review queue items
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* ```typescript
|
|
22
|
+
* const items = await humanReview.getQueue({
|
|
23
|
+
* agentId: 'agent_123',
|
|
24
|
+
* status: 'pending',
|
|
25
|
+
* limit: 20,
|
|
26
|
+
* });
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
async getQueue(options = {}) {
|
|
30
|
+
const params = new URLSearchParams();
|
|
31
|
+
if (options.agentId)
|
|
32
|
+
params.set('agentId', options.agentId);
|
|
33
|
+
if (options.status)
|
|
34
|
+
params.set('status', options.status);
|
|
35
|
+
if (options.reviewType)
|
|
36
|
+
params.set('reviewType', options.reviewType);
|
|
37
|
+
if (options.isCalibration !== undefined)
|
|
38
|
+
params.set('isCalibration', String(options.isCalibration));
|
|
39
|
+
if (options.minPriority !== undefined)
|
|
40
|
+
params.set('minPriority', String(options.minPriority));
|
|
41
|
+
if (options.limit)
|
|
42
|
+
params.set('limit', String(options.limit));
|
|
43
|
+
if (options.offset)
|
|
44
|
+
params.set('offset', String(options.offset));
|
|
45
|
+
return (0, client_1.apiRequestWithData)(`/human-review/queue?${params.toString()}`, { apiVersion: 'none' });
|
|
46
|
+
},
|
|
47
|
+
/**
|
|
48
|
+
* Add an item to the review queue
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```typescript
|
|
52
|
+
* const item = await humanReview.addToQueue({
|
|
53
|
+
* traceId: 'trace_123',
|
|
54
|
+
* agentId: 'agent_123',
|
|
55
|
+
* reviewType: 'disagreement',
|
|
56
|
+
* priority: 80,
|
|
57
|
+
* llmScore: 65,
|
|
58
|
+
* llmPassed: true,
|
|
59
|
+
* });
|
|
60
|
+
* ```
|
|
61
|
+
*/
|
|
62
|
+
async addToQueue(options) {
|
|
63
|
+
return (0, client_1.apiRequestWithData)('/human-review/queue', {
|
|
64
|
+
method: 'POST',
|
|
65
|
+
body: options,
|
|
66
|
+
apiVersion: 'none',
|
|
67
|
+
});
|
|
68
|
+
},
|
|
69
|
+
/**
|
|
70
|
+
* Get a specific review item
|
|
71
|
+
*
|
|
72
|
+
* @example
|
|
73
|
+
* ```typescript
|
|
74
|
+
* const item = await humanReview.getItem('item_123');
|
|
75
|
+
* ```
|
|
76
|
+
*/
|
|
77
|
+
async getItem(itemId) {
|
|
78
|
+
return (0, client_1.apiRequestWithData)(`/human-review/queue/${itemId}`, { apiVersion: 'none' });
|
|
79
|
+
},
|
|
80
|
+
/**
|
|
81
|
+
* Claim a review item for processing
|
|
82
|
+
*
|
|
83
|
+
* @example
|
|
84
|
+
* ```typescript
|
|
85
|
+
* const item = await humanReview.claim('item_123');
|
|
86
|
+
* ```
|
|
87
|
+
*/
|
|
88
|
+
async claim(itemId) {
|
|
89
|
+
return (0, client_1.apiRequestWithData)(`/human-review/queue/${itemId}/claim`, { method: 'POST', apiVersion: 'none' });
|
|
90
|
+
},
|
|
91
|
+
/**
|
|
92
|
+
* Release a claimed review item
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* await humanReview.release('item_123');
|
|
97
|
+
* ```
|
|
98
|
+
*/
|
|
99
|
+
async release(itemId) {
|
|
100
|
+
return (0, client_1.apiRequestWithData)(`/human-review/queue/${itemId}/release`, { method: 'POST', apiVersion: 'none' });
|
|
101
|
+
},
|
|
102
|
+
/**
|
|
103
|
+
* Skip a review item
|
|
104
|
+
*
|
|
105
|
+
* @example
|
|
106
|
+
* ```typescript
|
|
107
|
+
* await humanReview.skip('item_123');
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
async skip(itemId) {
|
|
111
|
+
return (0, client_1.apiRequestWithData)(`/human-review/queue/${itemId}/skip`, { method: 'POST', apiVersion: 'none' });
|
|
112
|
+
},
|
|
113
|
+
/**
|
|
114
|
+
* Submit a review
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* const result = await humanReview.submit('item_123', {
|
|
119
|
+
* passed: true,
|
|
120
|
+
* score: 85,
|
|
121
|
+
* reasoning: 'Response accurately addressed the query',
|
|
122
|
+
* durationMs: 45000,
|
|
123
|
+
* });
|
|
124
|
+
* ```
|
|
125
|
+
*/
|
|
126
|
+
async submit(itemId, review) {
|
|
127
|
+
return (0, client_1.apiRequestWithData)(`/human-review/queue/${itemId}/submit`, { method: 'POST', body: review, apiVersion: 'none' });
|
|
128
|
+
},
|
|
129
|
+
/**
|
|
130
|
+
* Get queue statistics
|
|
131
|
+
*
|
|
132
|
+
* @example
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const stats = await humanReview.getStats('agent_123');
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
async getStats(agentId) {
|
|
138
|
+
const params = agentId ? `?agentId=${agentId}` : '';
|
|
139
|
+
return (0, client_1.apiRequestWithData)(`/human-review/stats${params}`, { apiVersion: 'none' });
|
|
140
|
+
},
|
|
141
|
+
/**
|
|
142
|
+
* Get next item for a reviewer
|
|
143
|
+
*
|
|
144
|
+
* @example
|
|
145
|
+
* ```typescript
|
|
146
|
+
* const nextItem = await humanReview.getNextItem('agent_123');
|
|
147
|
+
* ```
|
|
148
|
+
*/
|
|
149
|
+
async getNextItem(agentId) {
|
|
150
|
+
const params = agentId ? `?agentId=${agentId}` : '';
|
|
151
|
+
return (0, client_1.apiRequestWithData)(`/human-review/next-item${params}`, { apiVersion: 'none' });
|
|
152
|
+
},
|
|
153
|
+
/**
|
|
154
|
+
* Get available review types
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* ```typescript
|
|
158
|
+
* const types = await humanReview.getReviewTypes();
|
|
159
|
+
* ```
|
|
160
|
+
*/
|
|
161
|
+
async getReviewTypes() {
|
|
162
|
+
return (0, client_1.apiRequestWithData)('/human-review/review-types', { apiVersion: 'none' });
|
|
163
|
+
},
|
|
164
|
+
// ---------------------------------------------------------------------------
|
|
165
|
+
// CALIBRATION SETS
|
|
166
|
+
// ---------------------------------------------------------------------------
|
|
167
|
+
/**
|
|
168
|
+
* Get calibration sets
|
|
169
|
+
*
|
|
170
|
+
* @example
|
|
171
|
+
* ```typescript
|
|
172
|
+
* const sets = await humanReview.getCalibrationSets('agent_123');
|
|
173
|
+
* ```
|
|
174
|
+
*/
|
|
175
|
+
async getCalibrationSets(agentId) {
|
|
176
|
+
const params = agentId ? `?agentId=${agentId}` : '';
|
|
177
|
+
return (0, client_1.apiRequestWithData)(`/human-review/calibration-sets${params}`, { apiVersion: 'none' });
|
|
178
|
+
},
|
|
179
|
+
/**
|
|
180
|
+
* Create a calibration set
|
|
181
|
+
*
|
|
182
|
+
* @example
|
|
183
|
+
* ```typescript
|
|
184
|
+
* const set = await humanReview.createCalibrationSet({
|
|
185
|
+
* name: 'Quality Calibration Q1',
|
|
186
|
+
* agentId: 'agent_123',
|
|
187
|
+
* targetAgreement: 0.85,
|
|
188
|
+
* minSamples: 50,
|
|
189
|
+
* });
|
|
190
|
+
* ```
|
|
191
|
+
*/
|
|
192
|
+
async createCalibrationSet(options) {
|
|
193
|
+
return (0, client_1.apiRequestWithData)('/human-review/calibration-sets', {
|
|
194
|
+
method: 'POST',
|
|
195
|
+
body: options,
|
|
196
|
+
apiVersion: 'none',
|
|
197
|
+
});
|
|
198
|
+
},
|
|
199
|
+
/**
|
|
200
|
+
* Get a calibration set by ID
|
|
201
|
+
*
|
|
202
|
+
* @example
|
|
203
|
+
* ```typescript
|
|
204
|
+
* const set = await humanReview.getCalibrationSet('set_123');
|
|
205
|
+
* ```
|
|
206
|
+
*/
|
|
207
|
+
async getCalibrationSet(setId) {
|
|
208
|
+
return (0, client_1.apiRequestWithData)(`/human-review/calibration-sets/${setId}`, { apiVersion: 'none' });
|
|
209
|
+
},
|
|
210
|
+
// ---------------------------------------------------------------------------
|
|
211
|
+
// REVIEWER CALIBRATION
|
|
212
|
+
// ---------------------------------------------------------------------------
|
|
213
|
+
/**
|
|
214
|
+
* Get certified reviewers for a calibration set
|
|
215
|
+
*
|
|
216
|
+
* @example
|
|
217
|
+
* ```typescript
|
|
218
|
+
* const reviewers = await humanReview.getCertifiedReviewers('set_123');
|
|
219
|
+
* ```
|
|
220
|
+
*/
|
|
221
|
+
async getCertifiedReviewers(calibrationSetId) {
|
|
222
|
+
return (0, client_1.apiRequestWithData)(`/human-review/reviewers?calibrationSetId=${calibrationSetId}`, { apiVersion: 'none' });
|
|
223
|
+
},
|
|
224
|
+
/**
|
|
225
|
+
* Get reviewer calibration status
|
|
226
|
+
*
|
|
227
|
+
* @example
|
|
228
|
+
* ```typescript
|
|
229
|
+
* const calibrations = await humanReview.getReviewerCalibrations('user_123');
|
|
230
|
+
* ```
|
|
231
|
+
*/
|
|
232
|
+
async getReviewerCalibrations(userId) {
|
|
233
|
+
return (0, client_1.apiRequestWithData)(`/human-review/calibration/${userId}`, { apiVersion: 'none' });
|
|
234
|
+
},
|
|
235
|
+
};
|
|
236
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"human-review.js","sourceRoot":"","sources":["../../src/api/human-review.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAEH,2CAAgE;AAuHhE,+EAA+E;AAC/E,0BAA0B;AAC1B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,WAAW,GAAG;IACzB;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAQ,CAAC,UAA4B,EAAE;QAC3C,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,IAAI,OAAO,CAAC,OAAO;YAAE,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5D,IAAI,OAAO,CAAC,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QACzD,IAAI,OAAO,CAAC,UAAU;YAAE,MAAM,CAAC,GAAG,CAAC,YAAY,EAAE,OAAO,CAAC,UAAU,CAAC,CAAC;QACrE,IAAI,OAAO,CAAC,aAAa,KAAK,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,eAAe,EAAE,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC;QACpG,IAAI,OAAO,CAAC,WAAW,KAAK,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC;QAC9F,IAAI,OAAO,CAAC,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9D,IAAI,OAAO,CAAC,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAEjE,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,MAAM,CAAC,QAAQ,EAAE,EAAE,EAC1C,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,UAAU,CAAC,OAA0B;QACzC,OAAO,IAAA,2BAAkB,EAAuB,qBAAqB,EAAE;YACrE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,OAAO,CAAC,MAAc;QAC1B,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,MAAM,EAAE,EAC/B,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,KAAK,CAAC,MAAc;QACxB,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,MAAM,QAAQ,EACrC,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,CACvC,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,OAAO,CAAC,MAAc;QAC1B,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,MAAM,UAAU,EACvC,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,CACvC,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,IAAI,CAAC,MAAc;QACvB,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,MAAM,OAAO,EACpC,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,CACvC,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,MAAM,CAAC,MAAc,EAAE,MAA2B;QACtD,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,MAAM,SAAS,EACtC,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,CACrD,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAgB;QAC7B,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,YAAY,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpD,OAAO,IAAA,2BAAkB,EACvB,sBAAsB,MAAM,EAAE,EAC9B,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,WAAW,CAAC,OAAgB;QAChC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,YAAY,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpD,OAAO,IAAA,2BAAkB,EACvB,0BAA0B,MAAM,EAAE,EAClC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,cAAc;QAMlB,OAAO,IAAA,2BAAkB,EAAC,4BAA4B,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC;IAClF,CAAC;IAED,8EAA8E;IAC9E,mBAAmB;IACnB,8EAA8E;IAE9E;;;;;;;OAOG;IACH,KAAK,CAAC,kBAAkB,CAAC,OAAgB;QACvC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,YAAY,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACpD,OAAO,IAAA,2BAAkB,EACvB,iCAAiC,MAAM,EAAE,EACzC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,KAAK,CAAC,oBAAoB,CAAC,OAAoC;QAC7D,OAAO,IAAA,2BAAkB,EAAiB,gCAAgC,EAAE;YAC1E,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,iBAAiB,CAAC,KAAa;QACnC,OAAO,IAAA,2BAAkB,EACvB,kCAAkC,KAAK,EAAE,EACzC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED,8EAA8E;IAC9E,uBAAuB;IACvB,8EAA8E;IAE9E;;;;;;;OAOG;IACH,KAAK,CAAC,qBAAqB,CAAC,gBAAwB;QAClD,OAAO,IAAA,2BAAkB,EACvB,4CAA4C,gBAAgB,EAAE,EAC9D,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,uBAAuB,CAAC,MAAc;QAC1C,OAAO,IAAA,2BAAkB,EACvB,6BAA6B,MAAM,EAAE,EACrC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Human Review API\n *\n * API for managing human review queue, calibration sets, and reviewer management\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type HumanReviewStatus = 'pending' | 'in_progress' | 'completed' | 'skipped' | 'expired';\nexport type HumanReviewType = 'disagreement' | 'low_confidence' | 'calibration' | 'random_sample' | 'flagged';\n\nexport interface HumanReviewQueueItem {\n  id: string;\n  companyId: string;\n  agentId: string;\n  traceId: string;\n  criterionId?: string;\n  reviewType: HumanReviewType;\n  priority: number;\n  status: HumanReviewStatus;\n  llmScore?: number;\n  llmPassed?: boolean;\n  llmReasoning?: string;\n  llmConfidence?: number;\n  reviewerId?: string;\n  humanScore?: number;\n  humanPassed?: boolean;\n  humanReasoning?: string;\n  reviewDurationMs?: number;\n  isCalibrationSample?: boolean;\n  calibrationSetId?: string;\n  expectedScore?: number;\n  expectedPassed?: boolean;\n  expiresAt?: string;\n  claimedAt?: string;\n  completedAt?: string;\n  createdAt: string;\n}\n\nexport interface AddToQueueOptions {\n  traceId: string;\n  criterionId?: string;\n  agentId: string;\n  reviewType: HumanReviewType;\n  priority?: number;\n  llmScore?: number;\n  llmPassed?: boolean;\n  llmReasoning?: string;\n  llmConfidence?: number;\n  isCalibrationSample?: boolean;\n  calibrationSetId?: string;\n  expectedScore?: number;\n  expectedPassed?: boolean;\n  expiresInMs?: number;\n  metadata?: Record<string, unknown>;\n}\n\nexport interface SubmitReviewOptions {\n  passed: boolean;\n  score: number;\n  reasoning: string;\n  durationMs: number;\n}\n\nexport interface CalibrationSet {\n  id: string;\n  companyId: string;\n  agentId?: string;\n  name: string;\n  description?: string;\n  targetSampleCount: number;\n  currentSampleCount: number;\n  minAgreementRate: number;\n  passingScoreThreshold: number;\n  criteriaIds: string[];\n  isActive: boolean;\n  createdBy?: string;\n  createdAt: string;\n}\n\nexport interface CreateCalibrationSetOptions {\n  name: string;\n  description?: string;\n  agentId: string;\n  criterionId?: string;\n  targetAgreement?: number;\n  minSamples?: number;\n}\n\nexport interface ReviewerCalibration {\n  id: string;\n  userId: string;\n  calibrationSetId: string;\n  totalReviews: number;\n  agreementRate: number;\n  meanAbsoluteError: number;\n  isCertified: boolean;\n  certifiedAt?: string;\n  lastReviewAt?: string;\n}\n\nexport interface QueueStats {\n  pending: number;\n  inProgress: number;\n  completed: number;\n  skipped: number;\n  expired: number;\n  avgReviewDurationMs?: number;\n  avgAgreementRate?: number;\n}\n\nexport interface ListQueueOptions {\n  agentId?: string;\n  status?: HumanReviewStatus;\n  reviewType?: HumanReviewType;\n  isCalibration?: boolean;\n  minPriority?: number;\n  limit?: number;\n  offset?: number;\n}\n\n// ============================================================================\n// HUMAN REVIEW API CLIENT\n// ============================================================================\n\n/**\n * Human Review API client for managing human review queue and calibration\n */\nexport const humanReview = {\n  /**\n   * Get pending review queue items\n   *\n   * @example\n   * ```typescript\n   * const items = await humanReview.getQueue({\n   *   agentId: 'agent_123',\n   *   status: 'pending',\n   *   limit: 20,\n   * });\n   * ```\n   */\n  async getQueue(options: ListQueueOptions = {}): Promise<HumanReviewQueueItem[]> {\n    const params = new URLSearchParams();\n    if (options.agentId) params.set('agentId', options.agentId);\n    if (options.status) params.set('status', options.status);\n    if (options.reviewType) params.set('reviewType', options.reviewType);\n    if (options.isCalibration !== undefined) params.set('isCalibration', String(options.isCalibration));\n    if (options.minPriority !== undefined) params.set('minPriority', String(options.minPriority));\n    if (options.limit) params.set('limit', String(options.limit));\n    if (options.offset) params.set('offset', String(options.offset));\n\n    return apiRequestWithData<HumanReviewQueueItem[]>(\n      `/human-review/queue?${params.toString()}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Add an item to the review queue\n   *\n   * @example\n   * ```typescript\n   * const item = await humanReview.addToQueue({\n   *   traceId: 'trace_123',\n   *   agentId: 'agent_123',\n   *   reviewType: 'disagreement',\n   *   priority: 80,\n   *   llmScore: 65,\n   *   llmPassed: true,\n   * });\n   * ```\n   */\n  async addToQueue(options: AddToQueueOptions): Promise<HumanReviewQueueItem> {\n    return apiRequestWithData<HumanReviewQueueItem>('/human-review/queue', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get a specific review item\n   *\n   * @example\n   * ```typescript\n   * const item = await humanReview.getItem('item_123');\n   * ```\n   */\n  async getItem(itemId: string): Promise<HumanReviewQueueItem> {\n    return apiRequestWithData<HumanReviewQueueItem>(\n      `/human-review/queue/${itemId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Claim a review item for processing\n   *\n   * @example\n   * ```typescript\n   * const item = await humanReview.claim('item_123');\n   * ```\n   */\n  async claim(itemId: string): Promise<HumanReviewQueueItem> {\n    return apiRequestWithData<HumanReviewQueueItem>(\n      `/human-review/queue/${itemId}/claim`,\n      { method: 'POST', apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Release a claimed review item\n   *\n   * @example\n   * ```typescript\n   * await humanReview.release('item_123');\n   * ```\n   */\n  async release(itemId: string): Promise<HumanReviewQueueItem> {\n    return apiRequestWithData<HumanReviewQueueItem>(\n      `/human-review/queue/${itemId}/release`,\n      { method: 'POST', apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Skip a review item\n   *\n   * @example\n   * ```typescript\n   * await humanReview.skip('item_123');\n   * ```\n   */\n  async skip(itemId: string): Promise<HumanReviewQueueItem> {\n    return apiRequestWithData<HumanReviewQueueItem>(\n      `/human-review/queue/${itemId}/skip`,\n      { method: 'POST', apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Submit a review\n   *\n   * @example\n   * ```typescript\n   * const result = await humanReview.submit('item_123', {\n   *   passed: true,\n   *   score: 85,\n   *   reasoning: 'Response accurately addressed the query',\n   *   durationMs: 45000,\n   * });\n   * ```\n   */\n  async submit(itemId: string, review: SubmitReviewOptions): Promise<HumanReviewQueueItem> {\n    return apiRequestWithData<HumanReviewQueueItem>(\n      `/human-review/queue/${itemId}/submit`,\n      { method: 'POST', body: review, apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get queue statistics\n   *\n   * @example\n   * ```typescript\n   * const stats = await humanReview.getStats('agent_123');\n   * ```\n   */\n  async getStats(agentId?: string): Promise<QueueStats> {\n    const params = agentId ? `?agentId=${agentId}` : '';\n    return apiRequestWithData<QueueStats>(\n      `/human-review/stats${params}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get next item for a reviewer\n   *\n   * @example\n   * ```typescript\n   * const nextItem = await humanReview.getNextItem('agent_123');\n   * ```\n   */\n  async getNextItem(agentId?: string): Promise<HumanReviewQueueItem | null> {\n    const params = agentId ? `?agentId=${agentId}` : '';\n    return apiRequestWithData<HumanReviewQueueItem | null>(\n      `/human-review/next-item${params}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get available review types\n   *\n   * @example\n   * ```typescript\n   * const types = await humanReview.getReviewTypes();\n   * ```\n   */\n  async getReviewTypes(): Promise<Array<{\n    id: string;\n    name: string;\n    description: string;\n    autoTrigger: boolean;\n  }>> {\n    return apiRequestWithData('/human-review/review-types', { apiVersion: 'none' });\n  },\n\n  // ---------------------------------------------------------------------------\n  // CALIBRATION SETS\n  // ---------------------------------------------------------------------------\n\n  /**\n   * Get calibration sets\n   *\n   * @example\n   * ```typescript\n   * const sets = await humanReview.getCalibrationSets('agent_123');\n   * ```\n   */\n  async getCalibrationSets(agentId?: string): Promise<CalibrationSet[]> {\n    const params = agentId ? `?agentId=${agentId}` : '';\n    return apiRequestWithData<CalibrationSet[]>(\n      `/human-review/calibration-sets${params}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Create a calibration set\n   *\n   * @example\n   * ```typescript\n   * const set = await humanReview.createCalibrationSet({\n   *   name: 'Quality Calibration Q1',\n   *   agentId: 'agent_123',\n   *   targetAgreement: 0.85,\n   *   minSamples: 50,\n   * });\n   * ```\n   */\n  async createCalibrationSet(options: CreateCalibrationSetOptions): Promise<CalibrationSet> {\n    return apiRequestWithData<CalibrationSet>('/human-review/calibration-sets', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get a calibration set by ID\n   *\n   * @example\n   * ```typescript\n   * const set = await humanReview.getCalibrationSet('set_123');\n   * ```\n   */\n  async getCalibrationSet(setId: string): Promise<CalibrationSet> {\n    return apiRequestWithData<CalibrationSet>(\n      `/human-review/calibration-sets/${setId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  // ---------------------------------------------------------------------------\n  // REVIEWER CALIBRATION\n  // ---------------------------------------------------------------------------\n\n  /**\n   * Get certified reviewers for a calibration set\n   *\n   * @example\n   * ```typescript\n   * const reviewers = await humanReview.getCertifiedReviewers('set_123');\n   * ```\n   */\n  async getCertifiedReviewers(calibrationSetId: string): Promise<ReviewerCalibration[]> {\n    return apiRequestWithData<ReviewerCalibration[]>(\n      `/human-review/reviewers?calibrationSetId=${calibrationSetId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get reviewer calibration status\n   *\n   * @example\n   * ```typescript\n   * const calibrations = await humanReview.getReviewerCalibrations('user_123');\n   * ```\n   */\n  async getReviewerCalibrations(userId: string): Promise<ReviewerCalibration[]> {\n    return apiRequestWithData<ReviewerCalibration[]>(\n      `/human-review/calibration/${userId}`,\n      { apiVersion: 'none' }\n    );\n  },\n};\n"]}
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ThinkHive SDK v3.0 - Non-Determinism API
|
|
3
|
+
*
|
|
4
|
+
* API for pass@k / pass^k analysis to measure LLM evaluation reliability
|
|
5
|
+
*/
|
|
6
|
+
export type NondeterminismRunType = 'pass_at_k' | 'pass_to_k' | 'variance' | 'reliability';
|
|
7
|
+
export type NondeterminismRunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
|
|
8
|
+
export interface NondeterminismRun {
|
|
9
|
+
id: string;
|
|
10
|
+
companyId: string;
|
|
11
|
+
agentId: string;
|
|
12
|
+
runType: NondeterminismRunType;
|
|
13
|
+
kValue: number;
|
|
14
|
+
status: NondeterminismRunStatus;
|
|
15
|
+
traceCount: number;
|
|
16
|
+
criterionId?: string;
|
|
17
|
+
criteriaIds: string[];
|
|
18
|
+
temperature?: string;
|
|
19
|
+
model?: string;
|
|
20
|
+
progressPercent: number;
|
|
21
|
+
passAtKRate?: string;
|
|
22
|
+
passToKRate?: string;
|
|
23
|
+
avgVariance?: string;
|
|
24
|
+
reliabilityScore?: string;
|
|
25
|
+
startedAt?: string;
|
|
26
|
+
completedAt?: string;
|
|
27
|
+
createdBy?: string;
|
|
28
|
+
createdAt: string;
|
|
29
|
+
}
|
|
30
|
+
export interface NondeterminismSample {
|
|
31
|
+
id: string;
|
|
32
|
+
runId: string;
|
|
33
|
+
traceId: string;
|
|
34
|
+
criterionId: string;
|
|
35
|
+
sampleIndex: number;
|
|
36
|
+
score: string;
|
|
37
|
+
passed: boolean;
|
|
38
|
+
reasoning?: string;
|
|
39
|
+
confidence?: string;
|
|
40
|
+
tokensUsed?: number;
|
|
41
|
+
costUsd?: string;
|
|
42
|
+
model?: string;
|
|
43
|
+
temperature?: string;
|
|
44
|
+
latencyMs?: number;
|
|
45
|
+
error?: string;
|
|
46
|
+
createdAt: string;
|
|
47
|
+
}
|
|
48
|
+
export interface CreateRunOptions {
|
|
49
|
+
agentId: string;
|
|
50
|
+
criterionId?: string;
|
|
51
|
+
criteriaIds?: string[];
|
|
52
|
+
kValue: number;
|
|
53
|
+
traceIds: string[];
|
|
54
|
+
runType?: NondeterminismRunType;
|
|
55
|
+
temperature?: number;
|
|
56
|
+
model?: string;
|
|
57
|
+
}
|
|
58
|
+
export interface RecordSampleOptions {
|
|
59
|
+
runId: string;
|
|
60
|
+
traceId: string;
|
|
61
|
+
criterionId: string;
|
|
62
|
+
sampleIndex: number;
|
|
63
|
+
score: number;
|
|
64
|
+
passed: boolean;
|
|
65
|
+
reasoning?: string;
|
|
66
|
+
confidence?: number;
|
|
67
|
+
tokensUsed?: number;
|
|
68
|
+
costUsd?: number;
|
|
69
|
+
model?: string;
|
|
70
|
+
temperature?: number;
|
|
71
|
+
latencyMs?: number;
|
|
72
|
+
error?: string;
|
|
73
|
+
}
|
|
74
|
+
export interface TraceAnalysis {
|
|
75
|
+
traceId: string;
|
|
76
|
+
samples: NondeterminismSample[];
|
|
77
|
+
passCount: number;
|
|
78
|
+
totalCount: number;
|
|
79
|
+
passRate: number;
|
|
80
|
+
scoreVariance: number;
|
|
81
|
+
meanScore: number;
|
|
82
|
+
isConsistent: boolean;
|
|
83
|
+
}
|
|
84
|
+
export interface CriterionAnalysis {
|
|
85
|
+
criterionId: string;
|
|
86
|
+
traceAnalyses: TraceAnalysis[];
|
|
87
|
+
passAtKRate: number;
|
|
88
|
+
passToKRate: number;
|
|
89
|
+
reliabilityScore: number;
|
|
90
|
+
isReliable: boolean;
|
|
91
|
+
recommendation: string;
|
|
92
|
+
}
|
|
93
|
+
export interface RunSummary {
|
|
94
|
+
run: NondeterminismRun;
|
|
95
|
+
traceAnalyses: TraceAnalysis[];
|
|
96
|
+
criterionAnalyses: CriterionAnalysis[];
|
|
97
|
+
}
|
|
98
|
+
export interface ListRunsOptions {
|
|
99
|
+
agentId?: string;
|
|
100
|
+
status?: NondeterminismRunStatus;
|
|
101
|
+
limit?: number;
|
|
102
|
+
offset?: number;
|
|
103
|
+
}
|
|
104
|
+
export interface PassAtKInfo {
|
|
105
|
+
concepts: {
|
|
106
|
+
passAtK: {
|
|
107
|
+
name: string;
|
|
108
|
+
description: string;
|
|
109
|
+
formula: string;
|
|
110
|
+
useCase: string;
|
|
111
|
+
};
|
|
112
|
+
passToK: {
|
|
113
|
+
name: string;
|
|
114
|
+
description: string;
|
|
115
|
+
formula: string;
|
|
116
|
+
useCase: string;
|
|
117
|
+
};
|
|
118
|
+
variance: {
|
|
119
|
+
name: string;
|
|
120
|
+
description: string;
|
|
121
|
+
useCase: string;
|
|
122
|
+
};
|
|
123
|
+
reliability: {
|
|
124
|
+
name: string;
|
|
125
|
+
description: string;
|
|
126
|
+
useCase: string;
|
|
127
|
+
};
|
|
128
|
+
};
|
|
129
|
+
recommendations: Record<string, string>;
|
|
130
|
+
defaults: {
|
|
131
|
+
kValue: number;
|
|
132
|
+
reliabilityThreshold: number;
|
|
133
|
+
varianceThreshold: number;
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Non-Determinism API client for pass@k analysis and reliability measurement
|
|
138
|
+
*/
|
|
139
|
+
export declare const nondeterminism: {
|
|
140
|
+
/**
|
|
141
|
+
* Create a new non-determinism analysis run
|
|
142
|
+
*
|
|
143
|
+
* @example
|
|
144
|
+
* ```typescript
|
|
145
|
+
* const run = await nondeterminism.createRun({
|
|
146
|
+
* agentId: 'agent_123',
|
|
147
|
+
* criterionId: 'criterion_456',
|
|
148
|
+
* kValue: 5,
|
|
149
|
+
* traceIds: ['trace_1', 'trace_2', 'trace_3'],
|
|
150
|
+
* runType: 'pass_at_k',
|
|
151
|
+
* });
|
|
152
|
+
* ```
|
|
153
|
+
*/
|
|
154
|
+
createRun(options: CreateRunOptions): Promise<NondeterminismRun>;
|
|
155
|
+
/**
|
|
156
|
+
* Get non-determinism runs
|
|
157
|
+
*
|
|
158
|
+
* @example
|
|
159
|
+
* ```typescript
|
|
160
|
+
* const runs = await nondeterminism.getRuns({ agentId: 'agent_123' });
|
|
161
|
+
* ```
|
|
162
|
+
*/
|
|
163
|
+
getRuns(options?: ListRunsOptions): Promise<NondeterminismRun[]>;
|
|
164
|
+
/**
|
|
165
|
+
* Get a specific run
|
|
166
|
+
*
|
|
167
|
+
* @example
|
|
168
|
+
* ```typescript
|
|
169
|
+
* const run = await nondeterminism.getRun('run_123');
|
|
170
|
+
* ```
|
|
171
|
+
*/
|
|
172
|
+
getRun(runId: string): Promise<NondeterminismRun>;
|
|
173
|
+
/**
|
|
174
|
+
* Start a run
|
|
175
|
+
*
|
|
176
|
+
* @example
|
|
177
|
+
* ```typescript
|
|
178
|
+
* await nondeterminism.startRun('run_123');
|
|
179
|
+
* ```
|
|
180
|
+
*/
|
|
181
|
+
startRun(runId: string): Promise<void>;
|
|
182
|
+
/**
|
|
183
|
+
* Complete a run
|
|
184
|
+
*
|
|
185
|
+
* @example
|
|
186
|
+
* ```typescript
|
|
187
|
+
* await nondeterminism.completeRun('run_123');
|
|
188
|
+
* ```
|
|
189
|
+
*/
|
|
190
|
+
completeRun(runId: string): Promise<void>;
|
|
191
|
+
/**
|
|
192
|
+
* Record a sample result
|
|
193
|
+
*
|
|
194
|
+
* @example
|
|
195
|
+
* ```typescript
|
|
196
|
+
* const sample = await nondeterminism.recordSample({
|
|
197
|
+
* runId: 'run_123',
|
|
198
|
+
* traceId: 'trace_456',
|
|
199
|
+
* criterionId: 'criterion_789',
|
|
200
|
+
* sampleIndex: 0,
|
|
201
|
+
* score: 85,
|
|
202
|
+
* passed: true,
|
|
203
|
+
* reasoning: 'Response meets quality criteria',
|
|
204
|
+
* });
|
|
205
|
+
* ```
|
|
206
|
+
*/
|
|
207
|
+
recordSample(options: RecordSampleOptions): Promise<NondeterminismSample>;
|
|
208
|
+
/**
|
|
209
|
+
* Get samples for a run
|
|
210
|
+
*
|
|
211
|
+
* @example
|
|
212
|
+
* ```typescript
|
|
213
|
+
* const samples = await nondeterminism.getSamples('run_123');
|
|
214
|
+
* ```
|
|
215
|
+
*/
|
|
216
|
+
getSamples(runId: string): Promise<NondeterminismSample[]>;
|
|
217
|
+
/**
|
|
218
|
+
* Get run summary with analysis
|
|
219
|
+
*
|
|
220
|
+
* @example
|
|
221
|
+
* ```typescript
|
|
222
|
+
* const summary = await nondeterminism.getRunSummary('run_123');
|
|
223
|
+
* console.log(`Pass@k rate: ${summary.criterionAnalyses[0].passAtKRate}`);
|
|
224
|
+
* ```
|
|
225
|
+
*/
|
|
226
|
+
getRunSummary(runId: string): Promise<RunSummary>;
|
|
227
|
+
/**
|
|
228
|
+
* Trigger analysis of a completed run
|
|
229
|
+
*
|
|
230
|
+
* @example
|
|
231
|
+
* ```typescript
|
|
232
|
+
* const summary = await nondeterminism.analyzeRun('run_123');
|
|
233
|
+
* ```
|
|
234
|
+
*/
|
|
235
|
+
analyzeRun(runId: string): Promise<RunSummary>;
|
|
236
|
+
/**
|
|
237
|
+
* Get information about pass@k analysis
|
|
238
|
+
*
|
|
239
|
+
* @example
|
|
240
|
+
* ```typescript
|
|
241
|
+
* const info = await nondeterminism.getInfo();
|
|
242
|
+
* console.log(info.concepts.passAtK.description);
|
|
243
|
+
* ```
|
|
244
|
+
*/
|
|
245
|
+
getInfo(): Promise<PassAtKInfo>;
|
|
246
|
+
};
|
|
247
|
+
/**
|
|
248
|
+
* Calculate pass@k probability from pass rate
|
|
249
|
+
*
|
|
250
|
+
* @param passRate - Single-run pass rate (0-1)
|
|
251
|
+
* @param k - Number of runs
|
|
252
|
+
* @returns Probability that at least 1 of k runs passes
|
|
253
|
+
*
|
|
254
|
+
* @example
|
|
255
|
+
* ```typescript
|
|
256
|
+
* const passAtK = calculatePassAtK(0.7, 3); // ~0.973
|
|
257
|
+
* ```
|
|
258
|
+
*/
|
|
259
|
+
export declare function calculatePassAtK(passRate: number, k: number): number;
|
|
260
|
+
/**
|
|
261
|
+
* Calculate pass^k probability from pass rate
|
|
262
|
+
*
|
|
263
|
+
* @param passRate - Single-run pass rate (0-1)
|
|
264
|
+
* @param k - Number of runs
|
|
265
|
+
* @returns Probability that all k runs pass
|
|
266
|
+
*
|
|
267
|
+
* @example
|
|
268
|
+
* ```typescript
|
|
269
|
+
* const passToK = calculatePassToK(0.7, 3); // ~0.343
|
|
270
|
+
* ```
|
|
271
|
+
*/
|
|
272
|
+
export declare function calculatePassToK(passRate: number, k: number): number;
|
|
273
|
+
/**
|
|
274
|
+
* Calculate required pass rate to achieve target pass@k
|
|
275
|
+
*
|
|
276
|
+
* @param targetPassAtK - Desired pass@k probability
|
|
277
|
+
* @param k - Number of runs
|
|
278
|
+
* @returns Required single-run pass rate
|
|
279
|
+
*
|
|
280
|
+
* @example
|
|
281
|
+
* ```typescript
|
|
282
|
+
* const requiredRate = requiredPassRateForPassAtK(0.95, 3); // ~0.632
|
|
283
|
+
* ```
|
|
284
|
+
*/
|
|
285
|
+
export declare function requiredPassRateForPassAtK(targetPassAtK: number, k: number): number;
|
|
286
|
+
/**
|
|
287
|
+
* Determine if evaluation is reliable based on analysis
|
|
288
|
+
*
|
|
289
|
+
* @param analysis - Criterion analysis result
|
|
290
|
+
* @param reliabilityThreshold - Minimum reliability score (default 0.8)
|
|
291
|
+
* @returns Whether the evaluation is considered reliable
|
|
292
|
+
*/
|
|
293
|
+
export declare function isReliableEvaluation(analysis: CriterionAnalysis, reliabilityThreshold?: number): boolean;
|
|
294
|
+
/**
|
|
295
|
+
* Get recommendation based on reliability analysis
|
|
296
|
+
*
|
|
297
|
+
* @param analysis - Criterion analysis result
|
|
298
|
+
* @returns Actionable recommendation string
|
|
299
|
+
*/
|
|
300
|
+
export declare function getReliabilityRecommendation(analysis: CriterionAnalysis): string;
|