@thinkhive/sdk 3.1.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MIGRATION.md +83 -12
- package/README.md +279 -128
- package/dist/api/agents.d.ts +169 -0
- package/dist/api/agents.js +185 -0
- package/dist/api/apiKeys.d.ts +252 -0
- package/dist/api/apiKeys.js +298 -0
- package/dist/api/business-metrics.d.ts +188 -0
- package/dist/api/business-metrics.js +213 -0
- package/dist/api/calibration.d.ts +0 -62
- package/dist/api/calibration.js +5 -48
- package/dist/api/claims.js +10 -7
- package/dist/api/conversation-eval.d.ts +200 -0
- package/dist/api/conversation-eval.js +235 -0
- package/dist/api/deterministic-graders.d.ts +205 -0
- package/dist/api/deterministic-graders.js +191 -0
- package/dist/api/eval-health.d.ts +250 -0
- package/dist/api/eval-health.js +224 -0
- package/dist/api/human-review.d.ts +275 -0
- package/dist/api/human-review.js +236 -0
- package/dist/api/nondeterminism.d.ts +300 -0
- package/dist/api/nondeterminism.js +250 -0
- package/dist/api/quality-metrics.d.ts +303 -0
- package/dist/api/quality-metrics.js +198 -0
- package/dist/api/roi-analytics.d.ts +263 -0
- package/dist/api/roi-analytics.js +204 -0
- package/dist/api/runs.js +12 -6
- package/dist/api/transcript-patterns.d.ts +204 -0
- package/dist/api/transcript-patterns.js +227 -0
- package/dist/core/client.d.ts +83 -9
- package/dist/core/client.js +229 -34
- package/dist/core/config.d.ts +2 -3
- package/dist/core/config.js +3 -4
- package/dist/core/types.d.ts +57 -4
- package/dist/core/types.js +1 -1
- package/dist/index.d.ts +429 -76
- package/dist/index.js +262 -42
- package/package.json +2 -2
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ThinkHive SDK v3.0 - Evaluation Health API
|
|
4
|
+
*
|
|
5
|
+
* API for eval saturation monitoring, regression detection, and health reports
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.evalHealth = void 0;
|
|
9
|
+
exports.hasHealthIssue = hasHealthIssue;
|
|
10
|
+
exports.getSeverityLevel = getSeverityLevel;
|
|
11
|
+
exports.isSaturated = isSaturated;
|
|
12
|
+
exports.getSaturationRecommendation = getSaturationRecommendation;
|
|
13
|
+
const client_1 = require("../core/client");
|
|
14
|
+
// ============================================================================
|
|
15
|
+
// EVAL HEALTH API CLIENT
|
|
16
|
+
// ============================================================================
|
|
17
|
+
/**
|
|
18
|
+
* Evaluation Health API client for monitoring eval quality and detecting regressions
|
|
19
|
+
*/
|
|
20
|
+
exports.evalHealth = {
|
|
21
|
+
/**
|
|
22
|
+
* Get comprehensive health report for an agent
|
|
23
|
+
*
|
|
24
|
+
* @example
|
|
25
|
+
* ```typescript
|
|
26
|
+
* const report = await evalHealth.getReport('agent_123');
|
|
27
|
+
* console.log(`Overall health: ${report.overallHealth}`);
|
|
28
|
+
* console.log(`Active regressions: ${report.regressionCount}`);
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
async getReport(agentId) {
|
|
32
|
+
return (0, client_1.apiRequestWithData)(`/eval-health/report?agentId=${agentId}`, { apiVersion: 'none' });
|
|
33
|
+
},
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// SNAPSHOTS
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
/**
|
|
38
|
+
* Get historical health snapshots
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const snapshots = await evalHealth.getSnapshots({
|
|
43
|
+
* agentId: 'agent_123',
|
|
44
|
+
* startDate: '2024-01-01T00:00:00Z',
|
|
45
|
+
* endDate: '2024-01-31T23:59:59Z',
|
|
46
|
+
* });
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
async getSnapshots(options) {
|
|
50
|
+
const params = new URLSearchParams();
|
|
51
|
+
params.set('agentId', options.agentId);
|
|
52
|
+
if (options.criterionId)
|
|
53
|
+
params.set('criterionId', options.criterionId);
|
|
54
|
+
if (options.startDate)
|
|
55
|
+
params.set('startDate', options.startDate);
|
|
56
|
+
if (options.endDate)
|
|
57
|
+
params.set('endDate', options.endDate);
|
|
58
|
+
return (0, client_1.apiRequestWithData)(`/eval-health/snapshots?${params.toString()}`, { apiVersion: 'none' });
|
|
59
|
+
},
|
|
60
|
+
/**
|
|
61
|
+
* Get latest health snapshot
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* ```typescript
|
|
65
|
+
* const snapshot = await evalHealth.getLatestSnapshot('agent_123');
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
async getLatestSnapshot(agentId, criterionId) {
|
|
69
|
+
const params = new URLSearchParams();
|
|
70
|
+
params.set('agentId', agentId);
|
|
71
|
+
if (criterionId)
|
|
72
|
+
params.set('criterionId', criterionId);
|
|
73
|
+
return (0, client_1.apiRequestWithData)(`/eval-health/snapshots/latest?${params.toString()}`, { apiVersion: 'none' });
|
|
74
|
+
},
|
|
75
|
+
/**
|
|
76
|
+
* Record a health snapshot
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* ```typescript
|
|
80
|
+
* const snapshot = await evalHealth.recordSnapshot({
|
|
81
|
+
* agentId: 'agent_123',
|
|
82
|
+
* snapshotDate: new Date().toISOString(),
|
|
83
|
+
* passRate: '0.85',
|
|
84
|
+
* evalCount: 150,
|
|
85
|
+
* healthStatus: 'healthy',
|
|
86
|
+
* });
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
async recordSnapshot(options) {
|
|
90
|
+
return (0, client_1.apiRequestWithData)('/eval-health/snapshots', {
|
|
91
|
+
method: 'POST',
|
|
92
|
+
body: options,
|
|
93
|
+
apiVersion: 'none',
|
|
94
|
+
});
|
|
95
|
+
},
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
// REGRESSIONS
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
/**
|
|
100
|
+
* Get unresolved regressions for an agent
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* ```typescript
|
|
104
|
+
* const regressions = await evalHealth.getRegressions('agent_123');
|
|
105
|
+
* for (const regression of regressions) {
|
|
106
|
+
* console.log(`${regression.severity}: ${regression.delta}% drop`);
|
|
107
|
+
* }
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
async getRegressions(agentId) {
|
|
111
|
+
return (0, client_1.apiRequestWithData)(`/eval-health/regressions?agentId=${agentId}`, { apiVersion: 'none' });
|
|
112
|
+
},
|
|
113
|
+
/**
|
|
114
|
+
* Record a new regression
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* const regression = await evalHealth.recordRegression({
|
|
119
|
+
* agentId: 'agent_123',
|
|
120
|
+
* severity: 'moderate',
|
|
121
|
+
* baselinePassRate: '0.92',
|
|
122
|
+
* currentPassRate: '0.78',
|
|
123
|
+
* delta: '-0.14',
|
|
124
|
+
* baselinePeriodStart: '2024-01-01T00:00:00Z',
|
|
125
|
+
* baselinePeriodEnd: '2024-01-15T23:59:59Z',
|
|
126
|
+
* currentPeriodStart: '2024-01-16T00:00:00Z',
|
|
127
|
+
* currentPeriodEnd: '2024-01-31T23:59:59Z',
|
|
128
|
+
* });
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
async recordRegression(options) {
|
|
132
|
+
return (0, client_1.apiRequestWithData)('/eval-health/regressions', {
|
|
133
|
+
method: 'POST',
|
|
134
|
+
body: options,
|
|
135
|
+
apiVersion: 'none',
|
|
136
|
+
});
|
|
137
|
+
},
|
|
138
|
+
/**
|
|
139
|
+
* Resolve a regression
|
|
140
|
+
*
|
|
141
|
+
* @example
|
|
142
|
+
* ```typescript
|
|
143
|
+
* await evalHealth.resolveRegression('regression_123', {
|
|
144
|
+
* resolutionType: 'fixed',
|
|
145
|
+
* notes: 'Updated prompt template to address quality issues',
|
|
146
|
+
* });
|
|
147
|
+
* ```
|
|
148
|
+
*/
|
|
149
|
+
async resolveRegression(regressionId, options) {
|
|
150
|
+
await (0, client_1.apiRequest)(`/eval-health/regressions/${regressionId}/resolve`, {
|
|
151
|
+
method: 'POST',
|
|
152
|
+
body: options,
|
|
153
|
+
apiVersion: 'none',
|
|
154
|
+
});
|
|
155
|
+
},
|
|
156
|
+
/**
|
|
157
|
+
* Acknowledge a regression
|
|
158
|
+
*
|
|
159
|
+
* @example
|
|
160
|
+
* ```typescript
|
|
161
|
+
* await evalHealth.acknowledgeRegression('regression_123');
|
|
162
|
+
* ```
|
|
163
|
+
*/
|
|
164
|
+
async acknowledgeRegression(regressionId) {
|
|
165
|
+
await (0, client_1.apiRequest)(`/eval-health/regressions/${regressionId}/acknowledge`, {
|
|
166
|
+
method: 'POST',
|
|
167
|
+
apiVersion: 'none',
|
|
168
|
+
});
|
|
169
|
+
},
|
|
170
|
+
};
|
|
171
|
+
// ============================================================================
|
|
172
|
+
// HELPER FUNCTIONS
|
|
173
|
+
// ============================================================================
|
|
174
|
+
/**
|
|
175
|
+
* Check if health status indicates an issue
|
|
176
|
+
*
|
|
177
|
+
* @param status - Health status to check
|
|
178
|
+
* @returns Whether the status indicates a problem
|
|
179
|
+
*/
|
|
180
|
+
function hasHealthIssue(status) {
|
|
181
|
+
return status === 'warning' || status === 'critical';
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Get severity level as numeric value for sorting
|
|
185
|
+
*
|
|
186
|
+
* @param severity - Regression severity
|
|
187
|
+
* @returns Numeric severity (1-3, higher is worse)
|
|
188
|
+
*/
|
|
189
|
+
function getSeverityLevel(severity) {
|
|
190
|
+
switch (severity) {
|
|
191
|
+
case 'minor': return 1;
|
|
192
|
+
case 'moderate': return 2;
|
|
193
|
+
case 'severe': return 3;
|
|
194
|
+
default: return 0;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Check if evaluation is saturated
|
|
199
|
+
*
|
|
200
|
+
* @param snapshot - Health snapshot to check
|
|
201
|
+
* @returns Whether evaluation is at ceiling or floor
|
|
202
|
+
*/
|
|
203
|
+
function isSaturated(snapshot) {
|
|
204
|
+
return snapshot.saturationType === 'ceiling' || snapshot.saturationType === 'floor';
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Get recommendation for saturation type
|
|
208
|
+
*
|
|
209
|
+
* @param saturationType - Type of saturation
|
|
210
|
+
* @returns Recommendation string
|
|
211
|
+
*/
|
|
212
|
+
function getSaturationRecommendation(saturationType) {
|
|
213
|
+
switch (saturationType) {
|
|
214
|
+
case 'ceiling':
|
|
215
|
+
return 'Evaluation criteria may be too lenient. Consider adding stricter checks or more challenging test cases.';
|
|
216
|
+
case 'floor':
|
|
217
|
+
return 'Evaluation criteria may be too strict. Consider relaxing thresholds or reviewing criteria for accuracy.';
|
|
218
|
+
case 'healthy':
|
|
219
|
+
return 'Evaluation is operating within healthy parameters.';
|
|
220
|
+
default:
|
|
221
|
+
return 'Unable to determine saturation status.';
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"eval-health.js","sourceRoot":"","sources":["../../src/api/eval-health.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AA4TH,wCAEC;AAQD,4CAOC;AAQD,kCAEC;AAQD,kEAWC;AAxWD,2CAAgE;AA8HhE,+EAA+E;AAC/E,yBAAyB;AACzB,+EAA+E;AAE/E;;GAEG;AACU,QAAA,UAAU,GAAG;IACxB;;;;;;;;;OASG;IACH,KAAK,CAAC,SAAS,CAAC,OAAe;QAC7B,OAAO,IAAA,2BAAkB,EACvB,+BAA+B,OAAO,EAAE,EACxC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED,8EAA8E;IAC9E,YAAY;IACZ,8EAA8E;IAE9E;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,YAAY,CAAC,OAA4B;QAC7C,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QACvC,IAAI,OAAO,CAAC,WAAW;YAAE,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;QACxE,IAAI,OAAO,CAAC,SAAS;YAAE,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,CAAC,SAAS,CAAC,CAAC;QAClE,IAAI,OAAO,CAAC,OAAO;YAAE,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAE5D,OAAO,IAAA,2BAAkB,EACvB,0BAA0B,MAAM,CAAC,QAAQ,EAAE,EAAE,EAC7C,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,iBAAiB,CAAC,OAAe,EAAE,WAAoB;QAC3D,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAC/B,IAAI,WAAW;YAAE,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,WAAW,CAAC,CAAC;QAExD,OAAO,IAAA,2BAAkB,EACvB,iCAAiC,MAAM,CAAC,QAAQ,EAAE,EAAE,EACpD,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,cAAc,CAAC,OAA8B;QACjD,OAAO,IAAA,2BAAkB,EAAqB,wBAAwB,EAAE;YACtE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED,8EAA8E;IAC9E,cAAc;IACd,8EAA8E;IAE9E;;;;;;;;;;OAUG;IACH,KAAK,CAAC,cAAc,CAAC,OAAe;QAClC,OAAO,IAAA,2BAAkB,EACvB,oCAAoC,OAAO,EAAE,EAC7C,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;OAiBG;IACH,KAAK,CAAC,gBAAgB,CAAC,OAAgC;QACrD,OAAO,IAAA,2BAAkB,EAAiB,0BAA0B,EAAE;YACpE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,iBAAiB,CAAC,YAAoB,EAAE,OAAiC;QAC7E,MAAM,IAAA,mBAAU,EAAC,4BAA4B,YAAY,UAAU,EAAE;YACnE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,qBAAqB,CAAC,YAAoB;QAC9C,MAAM,IAAA,mBAAU,EAAC,4BAA4B,YAAY,cAAc,EAAE;YACvE,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,MAAoB;IACjD,OAAO,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,UAAU,CAAC;AACvD,CAAC;AAED;;;;;GAKG;AACH,SAAgB,gBAAgB,CAAC,QAA4B;IAC3D,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;QACvB,KAAK,UAAU,CAAC,CAAC,OAAO,CAAC,CAAC;QAC1B,KAAK,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC;QACxB,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC;IACpB,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,SAAgB,WAAW,CAAC,QAA4B;IACtD,OAAO,QAAQ,CAAC,cAAc,KAAK,SAAS,IAAI,QAAQ,CAAC,cAAc,KAAK,OAAO,CAAC;AACtF,CAAC;AAED;;;;;GAKG;AACH,SAAgB,2BAA2B,CAAC,cAA8B;IACxE,QAAQ,cAAc,EAAE,CAAC;QACvB,KAAK,SAAS;YACZ,OAAO,yGAAyG,CAAC;QACnH,KAAK,OAAO;YACV,OAAO,yGAAyG,CAAC;QACnH,KAAK,SAAS;YACZ,OAAO,oDAAoD,CAAC;QAC9D;YACE,OAAO,wCAAwC,CAAC;IACpD,CAAC;AACH,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Evaluation Health API\n *\n * API for eval saturation monitoring, regression detection, and health reports\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type SaturationType = 'ceiling' | 'floor' | 'healthy';\nexport type HealthStatus = 'healthy' | 'warning' | 'critical';\nexport type RegressionSeverity = 'minor' | 'moderate' | 'severe';\n\nexport interface EvalHealthSnapshot {\n  id: string;\n  companyId: string;\n  agentId: string;\n  criterionId?: string;\n  snapshotDate: string;\n  passRate?: string;\n  evalCount?: number;\n  meanScore?: string;\n  saturationType?: SaturationType;\n  daysAtSaturation?: number;\n  trendDirection?: string;\n  trendStrength?: string;\n  healthStatus?: HealthStatus;\n  healthScore?: string;\n  createdAt: string;\n}\n\nexport interface EvalRegression {\n  id: string;\n  companyId: string;\n  agentId: string;\n  criterionId?: string;\n  severity: RegressionSeverity;\n  baselinePassRate: string;\n  currentPassRate: string;\n  delta: string;\n  deltaPercent?: string;\n  baselinePeriodStart: string;\n  baselinePeriodEnd: string;\n  currentPeriodStart: string;\n  currentPeriodEnd: string;\n  baselineEvalCount?: number;\n  currentEvalCount?: number;\n  suspectedCauses?: unknown[];\n  isSignificant?: boolean;\n  isResolved: boolean;\n  isAcknowledged: boolean;\n  resolvedAt?: string;\n  resolvedBy?: string;\n  resolutionType?: string;\n  resolutionNotes?: string;\n  acknowledgedAt?: string;\n  acknowledgedBy?: string;\n  detectedAt: string;\n  createdAt: string;\n}\n\nexport interface HealthReport {\n  agentId: string;\n  generatedAt: string;\n  overallHealth: HealthStatus;\n  overallScore: number;\n  passRate: number;\n  evalCount: number;\n  saturationStatus: {\n    type: SaturationType;\n    daysAtSaturation: number;\n    recommendation: string;\n  };\n  regressionCount: number;\n  activeRegressions: EvalRegression[];\n  trend: {\n    direction: 'improving' | 'stable' | 'declining';\n    strength: number;\n    description: string;\n  };\n  recommendations: string[];\n}\n\nexport interface CreateSnapshotOptions {\n  agentId: string;\n  criterionId?: string;\n  snapshotDate: string;\n  passRate?: string;\n  evalCount?: number;\n  meanScore?: string;\n  saturationType?: SaturationType;\n  daysAtSaturation?: number;\n  trendDirection?: string;\n  trendStrength?: string;\n  healthStatus?: HealthStatus;\n  healthScore?: string;\n}\n\nexport interface CreateRegressionOptions {\n  agentId: string;\n  criterionId?: string;\n  severity: RegressionSeverity;\n  baselinePassRate: string;\n  currentPassRate: string;\n  delta: string;\n  deltaPercent?: string;\n  baselinePeriodStart: string;\n  baselinePeriodEnd: string;\n  currentPeriodStart: string;\n  currentPeriodEnd: string;\n  baselineEvalCount?: number;\n  currentEvalCount?: number;\n  suspectedCauses?: unknown[];\n  isSignificant?: boolean;\n  detectedAt?: string;\n}\n\nexport interface ResolveRegressionOptions {\n  resolutionType: string;\n  notes?: string;\n}\n\nexport interface GetSnapshotsOptions {\n  agentId: string;\n  criterionId?: string;\n  startDate?: string;\n  endDate?: string;\n}\n\n// ============================================================================\n// EVAL HEALTH API CLIENT\n// ============================================================================\n\n/**\n * Evaluation Health API client for monitoring eval quality and detecting regressions\n */\nexport const evalHealth = {\n  /**\n   * Get comprehensive health report for an agent\n   *\n   * @example\n   * ```typescript\n   * const report = await evalHealth.getReport('agent_123');\n   * console.log(`Overall health: ${report.overallHealth}`);\n   * console.log(`Active regressions: ${report.regressionCount}`);\n   * ```\n   */\n  async getReport(agentId: string): Promise<HealthReport> {\n    return apiRequestWithData<HealthReport>(\n      `/eval-health/report?agentId=${agentId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  // ---------------------------------------------------------------------------\n  // SNAPSHOTS\n  // ---------------------------------------------------------------------------\n\n  /**\n   * Get historical health snapshots\n   *\n   * @example\n   * ```typescript\n   * const snapshots = await evalHealth.getSnapshots({\n   *   agentId: 'agent_123',\n   *   startDate: '2024-01-01T00:00:00Z',\n   *   endDate: '2024-01-31T23:59:59Z',\n   * });\n   * ```\n   */\n  async getSnapshots(options: GetSnapshotsOptions): Promise<EvalHealthSnapshot[]> {\n    const params = new URLSearchParams();\n    params.set('agentId', options.agentId);\n    if (options.criterionId) params.set('criterionId', options.criterionId);\n    if (options.startDate) params.set('startDate', options.startDate);\n    if (options.endDate) params.set('endDate', options.endDate);\n\n    return apiRequestWithData<EvalHealthSnapshot[]>(\n      `/eval-health/snapshots?${params.toString()}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get latest health snapshot\n   *\n   * @example\n   * ```typescript\n   * const snapshot = await evalHealth.getLatestSnapshot('agent_123');\n   * ```\n   */\n  async getLatestSnapshot(agentId: string, criterionId?: string): Promise<EvalHealthSnapshot | null> {\n    const params = new URLSearchParams();\n    params.set('agentId', agentId);\n    if (criterionId) params.set('criterionId', criterionId);\n\n    return apiRequestWithData<EvalHealthSnapshot | null>(\n      `/eval-health/snapshots/latest?${params.toString()}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Record a health snapshot\n   *\n   * @example\n   * ```typescript\n   * const snapshot = await evalHealth.recordSnapshot({\n   *   agentId: 'agent_123',\n   *   snapshotDate: new Date().toISOString(),\n   *   passRate: '0.85',\n   *   evalCount: 150,\n   *   healthStatus: 'healthy',\n   * });\n   * ```\n   */\n  async recordSnapshot(options: CreateSnapshotOptions): Promise<EvalHealthSnapshot> {\n    return apiRequestWithData<EvalHealthSnapshot>('/eval-health/snapshots', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  // ---------------------------------------------------------------------------\n  // REGRESSIONS\n  // ---------------------------------------------------------------------------\n\n  /**\n   * Get unresolved regressions for an agent\n   *\n   * @example\n   * ```typescript\n   * const regressions = await evalHealth.getRegressions('agent_123');\n   * for (const regression of regressions) {\n   *   console.log(`${regression.severity}: ${regression.delta}% drop`);\n   * }\n   * ```\n   */\n  async getRegressions(agentId: string): Promise<EvalRegression[]> {\n    return apiRequestWithData<EvalRegression[]>(\n      `/eval-health/regressions?agentId=${agentId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Record a new regression\n   *\n   * @example\n   * ```typescript\n   * const regression = await evalHealth.recordRegression({\n   *   agentId: 'agent_123',\n   *   severity: 'moderate',\n   *   baselinePassRate: '0.92',\n   *   currentPassRate: '0.78',\n   *   delta: '-0.14',\n   *   baselinePeriodStart: '2024-01-01T00:00:00Z',\n   *   baselinePeriodEnd: '2024-01-15T23:59:59Z',\n   *   currentPeriodStart: '2024-01-16T00:00:00Z',\n   *   currentPeriodEnd: '2024-01-31T23:59:59Z',\n   * });\n   * ```\n   */\n  async recordRegression(options: CreateRegressionOptions): Promise<EvalRegression> {\n    return apiRequestWithData<EvalRegression>('/eval-health/regressions', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Resolve a regression\n   *\n   * @example\n   * ```typescript\n   * await evalHealth.resolveRegression('regression_123', {\n   *   resolutionType: 'fixed',\n   *   notes: 'Updated prompt template to address quality issues',\n   * });\n   * ```\n   */\n  async resolveRegression(regressionId: string, options: ResolveRegressionOptions): Promise<void> {\n    await apiRequest(`/eval-health/regressions/${regressionId}/resolve`, {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Acknowledge a regression\n   *\n   * @example\n   * ```typescript\n   * await evalHealth.acknowledgeRegression('regression_123');\n   * ```\n   */\n  async acknowledgeRegression(regressionId: string): Promise<void> {\n    await apiRequest(`/eval-health/regressions/${regressionId}/acknowledge`, {\n      method: 'POST',\n      apiVersion: 'none',\n    });\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Check if health status indicates an issue\n *\n * @param status - Health status to check\n * @returns Whether the status indicates a problem\n */\nexport function hasHealthIssue(status: HealthStatus): boolean {\n  return status === 'warning' || status === 'critical';\n}\n\n/**\n * Get severity level as numeric value for sorting\n *\n * @param severity - Regression severity\n * @returns Numeric severity (1-3, higher is worse)\n */\nexport function getSeverityLevel(severity: RegressionSeverity): number {\n  switch (severity) {\n    case 'minor': return 1;\n    case 'moderate': return 2;\n    case 'severe': return 3;\n    default: return 0;\n  }\n}\n\n/**\n * Check if evaluation is saturated\n *\n * @param snapshot - Health snapshot to check\n * @returns Whether evaluation is at ceiling or floor\n */\nexport function isSaturated(snapshot: EvalHealthSnapshot): boolean {\n  return snapshot.saturationType === 'ceiling' || snapshot.saturationType === 'floor';\n}\n\n/**\n * Get recommendation for saturation type\n *\n * @param saturationType - Type of saturation\n * @returns Recommendation string\n */\nexport function getSaturationRecommendation(saturationType: SaturationType): string {\n  switch (saturationType) {\n    case 'ceiling':\n      return 'Evaluation criteria may be too lenient. Consider adding stricter checks or more challenging test cases.';\n    case 'floor':\n      return 'Evaluation criteria may be too strict. Consider relaxing thresholds or reviewing criteria for accuracy.';\n    case 'healthy':\n      return 'Evaluation is operating within healthy parameters.';\n    default:\n      return 'Unable to determine saturation status.';\n  }\n}\n"]}
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ThinkHive SDK v3.0 - Human Review API
|
|
3
|
+
*
|
|
4
|
+
* API for managing human review queue, calibration sets, and reviewer management
|
|
5
|
+
*/
|
|
6
|
+
export type HumanReviewStatus = 'pending' | 'in_progress' | 'completed' | 'skipped' | 'expired';
|
|
7
|
+
export type HumanReviewType = 'disagreement' | 'low_confidence' | 'calibration' | 'random_sample' | 'flagged';
|
|
8
|
+
export interface HumanReviewQueueItem {
|
|
9
|
+
id: string;
|
|
10
|
+
companyId: string;
|
|
11
|
+
agentId: string;
|
|
12
|
+
traceId: string;
|
|
13
|
+
criterionId?: string;
|
|
14
|
+
reviewType: HumanReviewType;
|
|
15
|
+
priority: number;
|
|
16
|
+
status: HumanReviewStatus;
|
|
17
|
+
llmScore?: number;
|
|
18
|
+
llmPassed?: boolean;
|
|
19
|
+
llmReasoning?: string;
|
|
20
|
+
llmConfidence?: number;
|
|
21
|
+
reviewerId?: string;
|
|
22
|
+
humanScore?: number;
|
|
23
|
+
humanPassed?: boolean;
|
|
24
|
+
humanReasoning?: string;
|
|
25
|
+
reviewDurationMs?: number;
|
|
26
|
+
isCalibrationSample?: boolean;
|
|
27
|
+
calibrationSetId?: string;
|
|
28
|
+
expectedScore?: number;
|
|
29
|
+
expectedPassed?: boolean;
|
|
30
|
+
expiresAt?: string;
|
|
31
|
+
claimedAt?: string;
|
|
32
|
+
completedAt?: string;
|
|
33
|
+
createdAt: string;
|
|
34
|
+
}
|
|
35
|
+
export interface AddToQueueOptions {
|
|
36
|
+
traceId: string;
|
|
37
|
+
criterionId?: string;
|
|
38
|
+
agentId: string;
|
|
39
|
+
reviewType: HumanReviewType;
|
|
40
|
+
priority?: number;
|
|
41
|
+
llmScore?: number;
|
|
42
|
+
llmPassed?: boolean;
|
|
43
|
+
llmReasoning?: string;
|
|
44
|
+
llmConfidence?: number;
|
|
45
|
+
isCalibrationSample?: boolean;
|
|
46
|
+
calibrationSetId?: string;
|
|
47
|
+
expectedScore?: number;
|
|
48
|
+
expectedPassed?: boolean;
|
|
49
|
+
expiresInMs?: number;
|
|
50
|
+
metadata?: Record<string, unknown>;
|
|
51
|
+
}
|
|
52
|
+
export interface SubmitReviewOptions {
|
|
53
|
+
passed: boolean;
|
|
54
|
+
score: number;
|
|
55
|
+
reasoning: string;
|
|
56
|
+
durationMs: number;
|
|
57
|
+
}
|
|
58
|
+
export interface CalibrationSet {
|
|
59
|
+
id: string;
|
|
60
|
+
companyId: string;
|
|
61
|
+
agentId?: string;
|
|
62
|
+
name: string;
|
|
63
|
+
description?: string;
|
|
64
|
+
targetSampleCount: number;
|
|
65
|
+
currentSampleCount: number;
|
|
66
|
+
minAgreementRate: number;
|
|
67
|
+
passingScoreThreshold: number;
|
|
68
|
+
criteriaIds: string[];
|
|
69
|
+
isActive: boolean;
|
|
70
|
+
createdBy?: string;
|
|
71
|
+
createdAt: string;
|
|
72
|
+
}
|
|
73
|
+
export interface CreateCalibrationSetOptions {
|
|
74
|
+
name: string;
|
|
75
|
+
description?: string;
|
|
76
|
+
agentId: string;
|
|
77
|
+
criterionId?: string;
|
|
78
|
+
targetAgreement?: number;
|
|
79
|
+
minSamples?: number;
|
|
80
|
+
}
|
|
81
|
+
export interface ReviewerCalibration {
|
|
82
|
+
id: string;
|
|
83
|
+
userId: string;
|
|
84
|
+
calibrationSetId: string;
|
|
85
|
+
totalReviews: number;
|
|
86
|
+
agreementRate: number;
|
|
87
|
+
meanAbsoluteError: number;
|
|
88
|
+
isCertified: boolean;
|
|
89
|
+
certifiedAt?: string;
|
|
90
|
+
lastReviewAt?: string;
|
|
91
|
+
}
|
|
92
|
+
export interface QueueStats {
|
|
93
|
+
pending: number;
|
|
94
|
+
inProgress: number;
|
|
95
|
+
completed: number;
|
|
96
|
+
skipped: number;
|
|
97
|
+
expired: number;
|
|
98
|
+
avgReviewDurationMs?: number;
|
|
99
|
+
avgAgreementRate?: number;
|
|
100
|
+
}
|
|
101
|
+
export interface ListQueueOptions {
|
|
102
|
+
agentId?: string;
|
|
103
|
+
status?: HumanReviewStatus;
|
|
104
|
+
reviewType?: HumanReviewType;
|
|
105
|
+
isCalibration?: boolean;
|
|
106
|
+
minPriority?: number;
|
|
107
|
+
limit?: number;
|
|
108
|
+
offset?: number;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Human Review API client for managing human review queue and calibration
|
|
112
|
+
*/
|
|
113
|
+
export declare const humanReview: {
|
|
114
|
+
/**
|
|
115
|
+
* Get pending review queue items
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* ```typescript
|
|
119
|
+
* const items = await humanReview.getQueue({
|
|
120
|
+
* agentId: 'agent_123',
|
|
121
|
+
* status: 'pending',
|
|
122
|
+
* limit: 20,
|
|
123
|
+
* });
|
|
124
|
+
* ```
|
|
125
|
+
*/
|
|
126
|
+
getQueue(options?: ListQueueOptions): Promise<HumanReviewQueueItem[]>;
|
|
127
|
+
/**
|
|
128
|
+
* Add an item to the review queue
|
|
129
|
+
*
|
|
130
|
+
* @example
|
|
131
|
+
* ```typescript
|
|
132
|
+
* const item = await humanReview.addToQueue({
|
|
133
|
+
* traceId: 'trace_123',
|
|
134
|
+
* agentId: 'agent_123',
|
|
135
|
+
* reviewType: 'disagreement',
|
|
136
|
+
* priority: 80,
|
|
137
|
+
* llmScore: 65,
|
|
138
|
+
* llmPassed: true,
|
|
139
|
+
* });
|
|
140
|
+
* ```
|
|
141
|
+
*/
|
|
142
|
+
addToQueue(options: AddToQueueOptions): Promise<HumanReviewQueueItem>;
|
|
143
|
+
/**
|
|
144
|
+
* Get a specific review item
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* ```typescript
|
|
148
|
+
* const item = await humanReview.getItem('item_123');
|
|
149
|
+
* ```
|
|
150
|
+
*/
|
|
151
|
+
getItem(itemId: string): Promise<HumanReviewQueueItem>;
|
|
152
|
+
/**
|
|
153
|
+
* Claim a review item for processing
|
|
154
|
+
*
|
|
155
|
+
* @example
|
|
156
|
+
* ```typescript
|
|
157
|
+
* const item = await humanReview.claim('item_123');
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
claim(itemId: string): Promise<HumanReviewQueueItem>;
|
|
161
|
+
/**
|
|
162
|
+
* Release a claimed review item
|
|
163
|
+
*
|
|
164
|
+
* @example
|
|
165
|
+
* ```typescript
|
|
166
|
+
* await humanReview.release('item_123');
|
|
167
|
+
* ```
|
|
168
|
+
*/
|
|
169
|
+
release(itemId: string): Promise<HumanReviewQueueItem>;
|
|
170
|
+
/**
|
|
171
|
+
* Skip a review item
|
|
172
|
+
*
|
|
173
|
+
* @example
|
|
174
|
+
* ```typescript
|
|
175
|
+
* await humanReview.skip('item_123');
|
|
176
|
+
* ```
|
|
177
|
+
*/
|
|
178
|
+
skip(itemId: string): Promise<HumanReviewQueueItem>;
|
|
179
|
+
/**
|
|
180
|
+
* Submit a review
|
|
181
|
+
*
|
|
182
|
+
* @example
|
|
183
|
+
* ```typescript
|
|
184
|
+
* const result = await humanReview.submit('item_123', {
|
|
185
|
+
* passed: true,
|
|
186
|
+
* score: 85,
|
|
187
|
+
* reasoning: 'Response accurately addressed the query',
|
|
188
|
+
* durationMs: 45000,
|
|
189
|
+
* });
|
|
190
|
+
* ```
|
|
191
|
+
*/
|
|
192
|
+
submit(itemId: string, review: SubmitReviewOptions): Promise<HumanReviewQueueItem>;
|
|
193
|
+
/**
|
|
194
|
+
* Get queue statistics
|
|
195
|
+
*
|
|
196
|
+
* @example
|
|
197
|
+
* ```typescript
|
|
198
|
+
* const stats = await humanReview.getStats('agent_123');
|
|
199
|
+
* ```
|
|
200
|
+
*/
|
|
201
|
+
getStats(agentId?: string): Promise<QueueStats>;
|
|
202
|
+
/**
|
|
203
|
+
* Get next item for a reviewer
|
|
204
|
+
*
|
|
205
|
+
* @example
|
|
206
|
+
* ```typescript
|
|
207
|
+
* const nextItem = await humanReview.getNextItem('agent_123');
|
|
208
|
+
* ```
|
|
209
|
+
*/
|
|
210
|
+
getNextItem(agentId?: string): Promise<HumanReviewQueueItem | null>;
|
|
211
|
+
/**
|
|
212
|
+
* Get available review types
|
|
213
|
+
*
|
|
214
|
+
* @example
|
|
215
|
+
* ```typescript
|
|
216
|
+
* const types = await humanReview.getReviewTypes();
|
|
217
|
+
* ```
|
|
218
|
+
*/
|
|
219
|
+
getReviewTypes(): Promise<Array<{
|
|
220
|
+
id: string;
|
|
221
|
+
name: string;
|
|
222
|
+
description: string;
|
|
223
|
+
autoTrigger: boolean;
|
|
224
|
+
}>>;
|
|
225
|
+
/**
|
|
226
|
+
* Get calibration sets
|
|
227
|
+
*
|
|
228
|
+
* @example
|
|
229
|
+
* ```typescript
|
|
230
|
+
* const sets = await humanReview.getCalibrationSets('agent_123');
|
|
231
|
+
* ```
|
|
232
|
+
*/
|
|
233
|
+
getCalibrationSets(agentId?: string): Promise<CalibrationSet[]>;
|
|
234
|
+
/**
|
|
235
|
+
* Create a calibration set
|
|
236
|
+
*
|
|
237
|
+
* @example
|
|
238
|
+
* ```typescript
|
|
239
|
+
* const set = await humanReview.createCalibrationSet({
|
|
240
|
+
* name: 'Quality Calibration Q1',
|
|
241
|
+
* agentId: 'agent_123',
|
|
242
|
+
* targetAgreement: 0.85,
|
|
243
|
+
* minSamples: 50,
|
|
244
|
+
* });
|
|
245
|
+
* ```
|
|
246
|
+
*/
|
|
247
|
+
createCalibrationSet(options: CreateCalibrationSetOptions): Promise<CalibrationSet>;
|
|
248
|
+
/**
|
|
249
|
+
* Get a calibration set by ID
|
|
250
|
+
*
|
|
251
|
+
* @example
|
|
252
|
+
* ```typescript
|
|
253
|
+
* const set = await humanReview.getCalibrationSet('set_123');
|
|
254
|
+
* ```
|
|
255
|
+
*/
|
|
256
|
+
getCalibrationSet(setId: string): Promise<CalibrationSet>;
|
|
257
|
+
/**
|
|
258
|
+
* Get certified reviewers for a calibration set
|
|
259
|
+
*
|
|
260
|
+
* @example
|
|
261
|
+
* ```typescript
|
|
262
|
+
* const reviewers = await humanReview.getCertifiedReviewers('set_123');
|
|
263
|
+
* ```
|
|
264
|
+
*/
|
|
265
|
+
getCertifiedReviewers(calibrationSetId: string): Promise<ReviewerCalibration[]>;
|
|
266
|
+
/**
|
|
267
|
+
* Get reviewer calibration status
|
|
268
|
+
*
|
|
269
|
+
* @example
|
|
270
|
+
* ```typescript
|
|
271
|
+
* const calibrations = await humanReview.getReviewerCalibrations('user_123');
|
|
272
|
+
* ```
|
|
273
|
+
*/
|
|
274
|
+
getReviewerCalibrations(userId: string): Promise<ReviewerCalibration[]>;
|
|
275
|
+
};
|