@thinkhive/sdk 2.0.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MIGRATION.md +274 -0
- package/README.md +7 -22
- package/dist/api/calibration.d.ts +168 -0
- package/dist/api/calibration.js +176 -0
- package/dist/api/claims.d.ts +262 -0
- package/dist/api/claims.js +262 -0
- package/dist/api/runs.d.ts +200 -0
- package/dist/api/runs.js +262 -0
- package/dist/core/client.d.ts +29 -0
- package/dist/core/client.js +89 -0
- package/dist/core/config.d.ts +38 -0
- package/dist/core/config.js +76 -0
- package/dist/core/types.d.ts +354 -0
- package/dist/core/types.js +8 -0
- package/dist/index.d.ts +222 -512
- package/dist/index.js +169 -394
- package/dist/instrumentation/langchain.d.ts +194 -0
- package/dist/instrumentation/langchain.js +429 -0
- package/dist/instrumentation/openai.d.ts +141 -0
- package/dist/instrumentation/openai.js +279 -0
- package/dist/integrations/customer-context.d.ts +203 -0
- package/dist/integrations/customer-context.js +274 -0
- package/dist/integrations/ticket-linking.d.ts +217 -0
- package/dist/integrations/ticket-linking.js +259 -0
- package/package.json +61 -9
package/MIGRATION.md
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# Migration Guide: v2.x to v3.0
|
|
2
|
+
|
|
3
|
+
This guide helps you migrate from ThinkHive SDK v2.x to v3.0.
|
|
4
|
+
|
|
5
|
+
## Breaking Changes
|
|
6
|
+
|
|
7
|
+
### Package Name Change
|
|
8
|
+
|
|
9
|
+
```diff
|
|
10
|
+
- npm install thinkhive-sdk
|
|
11
|
+
+ npm install @thinkhive/sdk
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
Update your imports:
|
|
15
|
+
|
|
16
|
+
```diff
|
|
17
|
+
- import ThinkHive from 'thinkhive-sdk';
|
|
18
|
+
+ import ThinkHive from '@thinkhive/sdk';
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Minimum Node.js Version
|
|
22
|
+
|
|
23
|
+
- **v2.x**: Node.js 16+
|
|
24
|
+
- **v3.0**: Node.js 18+
|
|
25
|
+
|
|
26
|
+
### Core Concepts Change
|
|
27
|
+
|
|
28
|
+
v3 is **run-centric**, not trace-centric:
|
|
29
|
+
|
|
30
|
+
| v2 Concept | v3 Concept |
|
|
31
|
+
|------------|------------|
|
|
32
|
+
| `trace` | `run` (atomic unit) |
|
|
33
|
+
| `TraceOptions` | `RunOptions` |
|
|
34
|
+
| `explainer.analyze()` | `runs.create()` + `claims.getRunAnalysis()` |
|
|
35
|
+
| `businessContext` | `customerContext` (time-series snapshot) |
|
|
36
|
+
| Analysis results | Claims (facts vs inferences) |
|
|
37
|
+
|
|
38
|
+
## Migration Steps
|
|
39
|
+
|
|
40
|
+
### 1. Update Initialization
|
|
41
|
+
|
|
42
|
+
```typescript
|
|
43
|
+
// v2
|
|
44
|
+
import { init } from 'thinkhive-sdk';
|
|
45
|
+
|
|
46
|
+
init({
|
|
47
|
+
apiKey: 'th_xxx',
|
|
48
|
+
serviceName: 'my-agent',
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// v3
|
|
52
|
+
import { init } from '@thinkhive/sdk';
|
|
53
|
+
|
|
54
|
+
init({
|
|
55
|
+
apiKey: 'th_xxx',
|
|
56
|
+
serviceName: 'my-agent',
|
|
57
|
+
apiVersion: 'v3', // Default is v3
|
|
58
|
+
});
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### 2. Migrate Trace Creation to Runs
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
// v2 - Trace-based
|
|
65
|
+
import { explainer } from 'thinkhive-sdk';
|
|
66
|
+
|
|
67
|
+
const result = await explainer.analyze({
|
|
68
|
+
userMessage: 'Help me with my order',
|
|
69
|
+
agentResponse: 'I found your order...',
|
|
70
|
+
outcome: 'success',
|
|
71
|
+
businessContext: {
|
|
72
|
+
customerId: 'cust_123',
|
|
73
|
+
transactionValue: 500,
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// v3 - Run-based
|
|
78
|
+
import { runs, claims } from '@thinkhive/sdk';
|
|
79
|
+
|
|
80
|
+
// Create a run
|
|
81
|
+
const run = await runs.create({
|
|
82
|
+
agentId: 'agent_123',
|
|
83
|
+
conversationMessages: [
|
|
84
|
+
{ role: 'user', content: 'Help me with my order' },
|
|
85
|
+
{ role: 'assistant', content: 'I found your order...' },
|
|
86
|
+
],
|
|
87
|
+
outcome: 'resolved',
|
|
88
|
+
customerContext: {
|
|
89
|
+
customerId: 'cust_123',
|
|
90
|
+
arr: 50000, // Customer ARR at run time
|
|
91
|
+
healthScore: 85, // Health score at run time
|
|
92
|
+
capturedAt: new Date().toISOString(),
|
|
93
|
+
},
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// Get analysis with claims (facts vs inferences)
|
|
97
|
+
const analysis = await claims.getRunAnalysis(run.id);
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 3. Migrate Business Context to Customer Context Snapshots
|
|
101
|
+
|
|
102
|
+
v3 uses **time-series snapshots** instead of current values:
|
|
103
|
+
|
|
104
|
+
```typescript
|
|
105
|
+
// v2 - Current values
|
|
106
|
+
const result = await explainer.analyze({
|
|
107
|
+
userMessage: '...',
|
|
108
|
+
agentResponse: '...',
|
|
109
|
+
businessContext: {
|
|
110
|
+
customerId: 'cust_123',
|
|
111
|
+
transactionValue: 500,
|
|
112
|
+
},
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
// v3 - Point-in-time snapshots
|
|
116
|
+
import { customerContext, runs } from '@thinkhive/sdk';
|
|
117
|
+
|
|
118
|
+
// First, capture customer metrics
|
|
119
|
+
const snapshot = await customerContext.captureSnapshot('cust_123', {
|
|
120
|
+
arr: 100000,
|
|
121
|
+
healthScore: 85,
|
|
122
|
+
segment: 'enterprise',
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
// Use the snapshot in your run
|
|
126
|
+
const run = await runs.create({
|
|
127
|
+
agentId: 'agent_123',
|
|
128
|
+
conversationMessages: [...],
|
|
129
|
+
customerContext: {
|
|
130
|
+
customerId: 'cust_123',
|
|
131
|
+
arr: snapshot.arr,
|
|
132
|
+
healthScore: snapshot.healthScore,
|
|
133
|
+
capturedAt: snapshot.capturedAt,
|
|
134
|
+
},
|
|
135
|
+
});
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### 4. Migrate to Claims API (Facts vs Inferences)
|
|
139
|
+
|
|
140
|
+
v3 separates facts from inferences:
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
// v2 - Single analysis result
|
|
144
|
+
const result = await explainer.analyze({...});
|
|
145
|
+
console.log(result.summary);
|
|
146
|
+
console.log(result.outcome.verdict);
|
|
147
|
+
|
|
148
|
+
// v3 - Claims with evidence
|
|
149
|
+
import { claims, isFact, isInference } from '@thinkhive/sdk';
|
|
150
|
+
|
|
151
|
+
const analysis = await claims.getRunAnalysis(run.id);
|
|
152
|
+
|
|
153
|
+
// Get all claims
|
|
154
|
+
for (const claim of analysis.claims) {
|
|
155
|
+
console.log(`[${claim.claimType}] ${claim.claimText}`);
|
|
156
|
+
console.log(`Confidence: ${claim.confidence}`);
|
|
157
|
+
|
|
158
|
+
if (isFact(claim)) {
|
|
159
|
+
console.log('This is an observed fact');
|
|
160
|
+
} else if (isInference(claim)) {
|
|
161
|
+
console.log('This is an LLM inference');
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Get facts vs inferences summary
|
|
166
|
+
const summary = await claims.summary({ runId: run.id });
|
|
167
|
+
console.log(`Facts: ${summary.observed.count}`);
|
|
168
|
+
console.log(`Inferences: ${summary.inferred.count}`);
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### 5. Add Ticket Linking (New in v3)
|
|
172
|
+
|
|
173
|
+
```typescript
|
|
174
|
+
import { runs, generateZendeskMarker, linkRunToZendeskTicket } from '@thinkhive/sdk';
|
|
175
|
+
|
|
176
|
+
// Method 1: Embed marker in agent response
|
|
177
|
+
const run = await runs.create({
|
|
178
|
+
agentId: 'agent_123',
|
|
179
|
+
conversationMessages: [...],
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
const marker = generateZendeskMarker(run.id);
|
|
183
|
+
const responseWithMarker = `Your order is on the way! ${marker}`;
|
|
184
|
+
// Send responseWithMarker to Zendesk
|
|
185
|
+
|
|
186
|
+
// Method 2: Explicit linking
|
|
187
|
+
await linkRunToZendeskTicket(run.id, '12345');
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### 6. Add Calibration Tracking (New in v3)
|
|
191
|
+
|
|
192
|
+
```typescript
|
|
193
|
+
import { calibration } from '@thinkhive/sdk';
|
|
194
|
+
|
|
195
|
+
// Check calibration status
|
|
196
|
+
const status = await calibration.status('agent_123', 'churn_risk');
|
|
197
|
+
console.log(`Brier score: ${status.brierScore}`);
|
|
198
|
+
console.log(`Is calibrated: ${status.isCalibrated}`);
|
|
199
|
+
|
|
200
|
+
// Record prediction outcomes
|
|
201
|
+
await calibration.recordOutcome({
|
|
202
|
+
runId: run.id,
|
|
203
|
+
predictionType: 'churn_risk',
|
|
204
|
+
predictedValue: 0.7, // We predicted 70% churn risk
|
|
205
|
+
actualOutcome: 1, // Customer did churn
|
|
206
|
+
});
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## Deprecated APIs
|
|
210
|
+
|
|
211
|
+
The following v2 APIs still work but are deprecated:
|
|
212
|
+
|
|
213
|
+
```typescript
|
|
214
|
+
// Deprecated - use runs.create() + claims.getRunAnalysis()
|
|
215
|
+
import { explainer } from '@thinkhive/sdk';
|
|
216
|
+
const result = await explainer.analyze({...}); // Still works
|
|
217
|
+
|
|
218
|
+
// Deprecated types
|
|
219
|
+
import type { TraceOptions, BusinessContext } from '@thinkhive/sdk';
|
|
220
|
+
// Use RunOptions, CustomerContextSnapshot instead
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
## New Instrumentation
|
|
224
|
+
|
|
225
|
+
### OpenAI Assistants
|
|
226
|
+
|
|
227
|
+
```typescript
|
|
228
|
+
import { wrapAssistantRun } from '@thinkhive/sdk/instrumentation/openai';
|
|
229
|
+
|
|
230
|
+
const run = await wrapAssistantRun(
|
|
231
|
+
() => openai.beta.threads.runs.create(threadId, { assistant_id: assistantId }),
|
|
232
|
+
{ assistantId, threadId }
|
|
233
|
+
);
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### LangGraph
|
|
237
|
+
|
|
238
|
+
```typescript
|
|
239
|
+
import { wrapLangGraphNode, wrapLangGraphExecution } from '@thinkhive/sdk/instrumentation/langchain';
|
|
240
|
+
|
|
241
|
+
// Wrap individual nodes
|
|
242
|
+
workflow.addNode('agent', wrapLangGraphNode('agent', agentFunction));
|
|
243
|
+
|
|
244
|
+
// Wrap entire workflow
|
|
245
|
+
const result = await wrapLangGraphExecution('support_workflow', () =>
|
|
246
|
+
compiledGraph.invoke({ messages: [...] })
|
|
247
|
+
);
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## TypeScript Changes
|
|
251
|
+
|
|
252
|
+
```typescript
|
|
253
|
+
// v2 types
|
|
254
|
+
import type { TraceOptions, SpanData, BusinessContext } from 'thinkhive-sdk';
|
|
255
|
+
|
|
256
|
+
// v3 types
|
|
257
|
+
import type {
|
|
258
|
+
RunOptions,
|
|
259
|
+
RunOutcome,
|
|
260
|
+
ConversationMessage,
|
|
261
|
+
CustomerContextSnapshot,
|
|
262
|
+
Claim,
|
|
263
|
+
ClaimType,
|
|
264
|
+
AnalysisResult,
|
|
265
|
+
LinkMethod,
|
|
266
|
+
CalibrationStatus,
|
|
267
|
+
} from '@thinkhive/sdk';
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
## Need Help?
|
|
271
|
+
|
|
272
|
+
- Documentation: https://docs.thinkhive.ai
|
|
273
|
+
- API Reference: https://api.thinkhive.ai/docs
|
|
274
|
+
- Support: support@thinkhive.ai
|
package/README.md
CHANGED
|
@@ -4,6 +4,7 @@ The official JavaScript/TypeScript SDK for [ThinkHive](https://thinkhive.ai) - A
|
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
|
+
- **25 Trace Format Support**: Automatic detection and normalization for LangSmith, Langfuse, Helicone, CrewAI, Opik, Braintrust, HoneyHive, Datadog, MLflow, AgentOps, Portkey, TruLens, Lunary, LangWatch, OpenLIT, Maxim AI, Galileo, PostHog, Keywords AI, Agenta, and more
|
|
7
8
|
- **Trace Analysis**: Analyze AI agent traces with detailed explainability
|
|
8
9
|
- **RAG Evaluation**: 8 quality metrics for RAG systems (groundedness, faithfulness, etc.)
|
|
9
10
|
- **Hallucination Detection**: 9 types of hallucination detection
|
|
@@ -188,28 +189,12 @@ autoInstrument(client, {
|
|
|
188
189
|
|
|
189
190
|
## Analysis Tiers
|
|
190
191
|
|
|
191
|
-
| Tier | Description |
|
|
192
|
-
|
|
193
|
-
| `rule_based` | Pattern matching, keyword extraction |
|
|
194
|
-
| `fast_llm` | Quick LLM analysis
|
|
195
|
-
| `full_llm` | Complete analysis
|
|
196
|
-
| `deep` | Multi-pass with validation |
|
|
197
|
-
|
|
198
|
-
## Supported Models (December 2025)
|
|
199
|
-
|
|
200
|
-
### Azure OpenAI (via Azure API)
|
|
201
|
-
- **GPT-5 Series**: gpt-5.2, gpt-5, gpt-5-pro, gpt-5-mini
|
|
202
|
-
- **GPT-4.1 Series**: gpt-4.1, gpt-4.1-mini, gpt-4.1-nano
|
|
203
|
-
- **O-Series Reasoning**: o4-mini, o3-pro, o3, o3-mini
|
|
204
|
-
- **Embeddings**: text-embedding-3-large, text-embedding-3-small
|
|
205
|
-
|
|
206
|
-
### Anthropic Claude (via Azure)
|
|
207
|
-
- **Claude 4.5 Series**: claude-opus-4.5, claude-sonnet-4.5, claude-haiku-4.5
|
|
208
|
-
|
|
209
|
-
### Google Gemini (Direct API)
|
|
210
|
-
- **Gemini 3**: gemini-3-flash, gemini-3-flash-lite
|
|
211
|
-
- **Gemini 2.5**: gemini-2.5-pro, gemini-2.5-flash
|
|
212
|
-
- **Gemini 2.0**: gemini-2.0-flash-lite
|
|
192
|
+
| Tier | Description | Latency | Cost |
|
|
193
|
+
|------|-------------|---------|------|
|
|
194
|
+
| `rule_based` | Pattern matching, keyword extraction | ~50ms | Free |
|
|
195
|
+
| `fast_llm` | Quick LLM analysis (GPT-3.5) | ~500ms | Low |
|
|
196
|
+
| `full_llm` | Complete analysis (GPT-4o) | ~3s | Standard |
|
|
197
|
+
| `deep` | Multi-pass with validation | ~15s | Premium |
|
|
213
198
|
|
|
214
199
|
## Environment Variables
|
|
215
200
|
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ThinkHive SDK v3.0 - Calibration API
|
|
3
|
+
*
|
|
4
|
+
* Prediction accuracy tracking with Brier scores and calibration metrics
|
|
5
|
+
*/
|
|
6
|
+
import type { CalibrationStatus, CalibrationBucket, PredictionType } from '../core/types';
|
|
7
|
+
/**
|
|
8
|
+
* Record outcome input
|
|
9
|
+
*/
|
|
10
|
+
export interface RecordOutcomeInput {
|
|
11
|
+
/** Run ID the prediction was made for */
|
|
12
|
+
runId: string;
|
|
13
|
+
/** Type of prediction */
|
|
14
|
+
predictionType: PredictionType;
|
|
15
|
+
/** The predicted value (0-1 for probabilities) */
|
|
16
|
+
predictedValue: number;
|
|
17
|
+
/** The actual outcome (0 or 1 for binary, or actual value) */
|
|
18
|
+
actualOutcome: number;
|
|
19
|
+
/** When the prediction was made */
|
|
20
|
+
predictedAt?: string;
|
|
21
|
+
/** When the outcome was observed */
|
|
22
|
+
observedAt?: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Calibration metrics
|
|
26
|
+
*/
|
|
27
|
+
export interface CalibrationMetrics {
|
|
28
|
+
agentId: string;
|
|
29
|
+
predictionType: PredictionType;
|
|
30
|
+
/** Brier score (lower is better, <0.1 is good) */
|
|
31
|
+
brierScore: number;
|
|
32
|
+
/** Expected Calibration Error */
|
|
33
|
+
ece: number;
|
|
34
|
+
/** Maximum Calibration Error */
|
|
35
|
+
mce: number;
|
|
36
|
+
/** Sample count */
|
|
37
|
+
sampleCount: number;
|
|
38
|
+
/** Is the model well-calibrated */
|
|
39
|
+
isCalibrated: boolean;
|
|
40
|
+
/** Reliability diagram data */
|
|
41
|
+
reliabilityDiagram: CalibrationBucket[];
|
|
42
|
+
/** Last updated */
|
|
43
|
+
lastUpdated: string;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Calibration API client for prediction accuracy tracking
|
|
47
|
+
*/
|
|
48
|
+
export declare const calibration: {
|
|
49
|
+
/**
|
|
50
|
+
* Get calibration status for an agent
|
|
51
|
+
*
|
|
52
|
+
* @example
|
|
53
|
+
* ```typescript
|
|
54
|
+
* const status = await calibration.status('agent_123', 'churn_risk');
|
|
55
|
+
* console.log(`Brier score: ${status.brierScore}`);
|
|
56
|
+
* console.log(`Is calibrated: ${status.isCalibrated}`);
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
status(agentId: string, predictionType: PredictionType): Promise<CalibrationStatus>;
|
|
60
|
+
/**
|
|
61
|
+
* Get all calibration metrics for an agent
|
|
62
|
+
*
|
|
63
|
+
* @example
|
|
64
|
+
* ```typescript
|
|
65
|
+
* const metrics = await calibration.allMetrics('agent_123');
|
|
66
|
+
* for (const m of metrics) {
|
|
67
|
+
* console.log(`${m.predictionType}: Brier=${m.brierScore}`);
|
|
68
|
+
* }
|
|
69
|
+
* ```
|
|
70
|
+
*/
|
|
71
|
+
allMetrics(agentId: string): Promise<CalibrationMetrics[]>;
|
|
72
|
+
/**
|
|
73
|
+
* Record a prediction outcome for calibration tracking
|
|
74
|
+
*
|
|
75
|
+
* @example
|
|
76
|
+
* ```typescript
|
|
77
|
+
* // Record a churn prediction outcome
|
|
78
|
+
* await calibration.recordOutcome({
|
|
79
|
+
* runId: 'run_abc123',
|
|
80
|
+
* predictionType: 'churn_risk',
|
|
81
|
+
* predictedValue: 0.7, // We predicted 70% churn risk
|
|
82
|
+
* actualOutcome: 1, // Customer did churn
|
|
83
|
+
* });
|
|
84
|
+
*
|
|
85
|
+
* // Record a resolution time prediction
|
|
86
|
+
* await calibration.recordOutcome({
|
|
87
|
+
* runId: 'run_abc123',
|
|
88
|
+
* predictionType: 'resolution_time',
|
|
89
|
+
* predictedValue: 15, // Predicted 15 minutes
|
|
90
|
+
* actualOutcome: 22, // Actual was 22 minutes
|
|
91
|
+
* });
|
|
92
|
+
* ```
|
|
93
|
+
*/
|
|
94
|
+
recordOutcome(input: RecordOutcomeInput): Promise<{
|
|
95
|
+
recorded: boolean;
|
|
96
|
+
brierContribution: number;
|
|
97
|
+
message: string;
|
|
98
|
+
}>;
|
|
99
|
+
/**
|
|
100
|
+
* Trigger recalibration for an agent
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* ```typescript
|
|
104
|
+
* const result = await calibration.retrain('agent_123', {
|
|
105
|
+
* predictionTypes: ['churn_risk', 'escalation_risk'],
|
|
106
|
+
* });
|
|
107
|
+
* console.log(`Retrained: ${result.success}`);
|
|
108
|
+
* ```
|
|
109
|
+
*/
|
|
110
|
+
retrain(agentId: string, options?: {
|
|
111
|
+
predictionTypes?: PredictionType[];
|
|
112
|
+
minSamples?: number;
|
|
113
|
+
}): Promise<{
|
|
114
|
+
success: boolean;
|
|
115
|
+
retrainedTypes: PredictionType[];
|
|
116
|
+
skippedTypes: Array<{
|
|
117
|
+
type: PredictionType;
|
|
118
|
+
reason: string;
|
|
119
|
+
}>;
|
|
120
|
+
newMetrics: CalibrationMetrics[];
|
|
121
|
+
}>;
|
|
122
|
+
/**
|
|
123
|
+
* Get reliability diagram data for visualization
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* ```typescript
|
|
127
|
+
* const diagram = await calibration.reliabilityDiagram('agent_123', 'outcome');
|
|
128
|
+
* // Use diagram.buckets to plot predicted vs actual probabilities
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
reliabilityDiagram(agentId: string, predictionType: PredictionType): Promise<{
|
|
132
|
+
agentId: string;
|
|
133
|
+
predictionType: PredictionType;
|
|
134
|
+
buckets: CalibrationBucket[];
|
|
135
|
+
perfectCalibrationLine: Array<{
|
|
136
|
+
x: number;
|
|
137
|
+
y: number;
|
|
138
|
+
}>;
|
|
139
|
+
}>;
|
|
140
|
+
};
|
|
141
|
+
/**
|
|
142
|
+
* Calculate Brier score from predictions and outcomes
|
|
143
|
+
* Lower is better, <0.1 is considered good
|
|
144
|
+
*/
|
|
145
|
+
export declare function calculateBrierScore(predictions: Array<{
|
|
146
|
+
predicted: number;
|
|
147
|
+
actual: number;
|
|
148
|
+
}>): number;
|
|
149
|
+
/**
|
|
150
|
+
* Calculate Expected Calibration Error (ECE)
|
|
151
|
+
* Measures how well-calibrated predictions are across confidence buckets
|
|
152
|
+
*/
|
|
153
|
+
export declare function calculateECE(predictions: Array<{
|
|
154
|
+
predicted: number;
|
|
155
|
+
actual: number;
|
|
156
|
+
}>, numBuckets?: number): number;
|
|
157
|
+
/**
|
|
158
|
+
* Check if a model is well-calibrated based on Brier score
|
|
159
|
+
*/
|
|
160
|
+
export declare function isWellCalibrated(brierScore: number): boolean;
|
|
161
|
+
/**
|
|
162
|
+
* Get calibration quality label
|
|
163
|
+
*/
|
|
164
|
+
export declare function getCalibrationQuality(brierScore: number): 'excellent' | 'good' | 'fair' | 'poor';
|
|
165
|
+
/**
|
|
166
|
+
* Format Brier score for display
|
|
167
|
+
*/
|
|
168
|
+
export declare function formatBrierScore(score: number): string;
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ThinkHive SDK v3.0 - Calibration API
|
|
4
|
+
*
|
|
5
|
+
* Prediction accuracy tracking with Brier scores and calibration metrics
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.calibration = void 0;
|
|
9
|
+
exports.calculateBrierScore = calculateBrierScore;
|
|
10
|
+
exports.calculateECE = calculateECE;
|
|
11
|
+
exports.isWellCalibrated = isWellCalibrated;
|
|
12
|
+
exports.getCalibrationQuality = getCalibrationQuality;
|
|
13
|
+
exports.formatBrierScore = formatBrierScore;
|
|
14
|
+
const client_1 = require("../core/client");
|
|
15
|
+
/**
|
|
16
|
+
* Calibration API client for prediction accuracy tracking
|
|
17
|
+
*/
|
|
18
|
+
exports.calibration = {
|
|
19
|
+
/**
|
|
20
|
+
* Get calibration status for an agent
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* ```typescript
|
|
24
|
+
* const status = await calibration.status('agent_123', 'churn_risk');
|
|
25
|
+
* console.log(`Brier score: ${status.brierScore}`);
|
|
26
|
+
* console.log(`Is calibrated: ${status.isCalibrated}`);
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
async status(agentId, predictionType) {
|
|
30
|
+
return (0, client_1.apiRequestWithData)(`/calibration/status/${agentId}?predictionType=${predictionType}`);
|
|
31
|
+
},
|
|
32
|
+
/**
|
|
33
|
+
* Get all calibration metrics for an agent
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```typescript
|
|
37
|
+
* const metrics = await calibration.allMetrics('agent_123');
|
|
38
|
+
* for (const m of metrics) {
|
|
39
|
+
* console.log(`${m.predictionType}: Brier=${m.brierScore}`);
|
|
40
|
+
* }
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
async allMetrics(agentId) {
|
|
44
|
+
return (0, client_1.apiRequestWithData)(`/calibration/metrics/${agentId}`);
|
|
45
|
+
},
|
|
46
|
+
/**
|
|
47
|
+
* Record a prediction outcome for calibration tracking
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* ```typescript
|
|
51
|
+
* // Record a churn prediction outcome
|
|
52
|
+
* await calibration.recordOutcome({
|
|
53
|
+
* runId: 'run_abc123',
|
|
54
|
+
* predictionType: 'churn_risk',
|
|
55
|
+
* predictedValue: 0.7, // We predicted 70% churn risk
|
|
56
|
+
* actualOutcome: 1, // Customer did churn
|
|
57
|
+
* });
|
|
58
|
+
*
|
|
59
|
+
* // Record a resolution time prediction
|
|
60
|
+
* await calibration.recordOutcome({
|
|
61
|
+
* runId: 'run_abc123',
|
|
62
|
+
* predictionType: 'resolution_time',
|
|
63
|
+
* predictedValue: 15, // Predicted 15 minutes
|
|
64
|
+
* actualOutcome: 22, // Actual was 22 minutes
|
|
65
|
+
* });
|
|
66
|
+
* ```
|
|
67
|
+
*/
|
|
68
|
+
async recordOutcome(input) {
|
|
69
|
+
return (0, client_1.apiRequestWithData)('/calibration/record', {
|
|
70
|
+
method: 'POST',
|
|
71
|
+
body: {
|
|
72
|
+
...input,
|
|
73
|
+
predictedAt: input.predictedAt || new Date().toISOString(),
|
|
74
|
+
observedAt: input.observedAt || new Date().toISOString(),
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
},
|
|
78
|
+
/**
|
|
79
|
+
* Trigger recalibration for an agent
|
|
80
|
+
*
|
|
81
|
+
* @example
|
|
82
|
+
* ```typescript
|
|
83
|
+
* const result = await calibration.retrain('agent_123', {
|
|
84
|
+
* predictionTypes: ['churn_risk', 'escalation_risk'],
|
|
85
|
+
* });
|
|
86
|
+
* console.log(`Retrained: ${result.success}`);
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
async retrain(agentId, options = {}) {
|
|
90
|
+
return (0, client_1.apiRequestWithData)(`/calibration/retrain/${agentId}`, {
|
|
91
|
+
method: 'POST',
|
|
92
|
+
body: options,
|
|
93
|
+
});
|
|
94
|
+
},
|
|
95
|
+
/**
|
|
96
|
+
* Get reliability diagram data for visualization
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* ```typescript
|
|
100
|
+
* const diagram = await calibration.reliabilityDiagram('agent_123', 'outcome');
|
|
101
|
+
* // Use diagram.buckets to plot predicted vs actual probabilities
|
|
102
|
+
* ```
|
|
103
|
+
*/
|
|
104
|
+
async reliabilityDiagram(agentId, predictionType) {
|
|
105
|
+
return (0, client_1.apiRequestWithData)(`/calibration/diagram/${agentId}?predictionType=${predictionType}`);
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
// ============================================================================
|
|
109
|
+
// HELPER FUNCTIONS
|
|
110
|
+
// ============================================================================
|
|
111
|
+
/**
|
|
112
|
+
* Calculate Brier score from predictions and outcomes
|
|
113
|
+
* Lower is better, <0.1 is considered good
|
|
114
|
+
*/
|
|
115
|
+
function calculateBrierScore(predictions) {
|
|
116
|
+
if (predictions.length === 0)
|
|
117
|
+
return 0;
|
|
118
|
+
const sum = predictions.reduce((acc, { predicted, actual }) => {
|
|
119
|
+
return acc + Math.pow(predicted - actual, 2);
|
|
120
|
+
}, 0);
|
|
121
|
+
return sum / predictions.length;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Calculate Expected Calibration Error (ECE)
|
|
125
|
+
* Measures how well-calibrated predictions are across confidence buckets
|
|
126
|
+
*/
|
|
127
|
+
function calculateECE(predictions, numBuckets = 10) {
|
|
128
|
+
if (predictions.length === 0)
|
|
129
|
+
return 0;
|
|
130
|
+
const buckets = [];
|
|
131
|
+
for (let i = 0; i < numBuckets; i++) {
|
|
132
|
+
buckets.push({ predictions: [], actuals: [] });
|
|
133
|
+
}
|
|
134
|
+
// Assign predictions to buckets
|
|
135
|
+
for (const { predicted, actual } of predictions) {
|
|
136
|
+
const bucketIndex = Math.min(Math.floor(predicted * numBuckets), numBuckets - 1);
|
|
137
|
+
buckets[bucketIndex].predictions.push(predicted);
|
|
138
|
+
buckets[bucketIndex].actuals.push(actual);
|
|
139
|
+
}
|
|
140
|
+
// Calculate ECE
|
|
141
|
+
let ece = 0;
|
|
142
|
+
for (const bucket of buckets) {
|
|
143
|
+
if (bucket.predictions.length === 0)
|
|
144
|
+
continue;
|
|
145
|
+
const avgPredicted = bucket.predictions.reduce((a, b) => a + b, 0) / bucket.predictions.length;
|
|
146
|
+
const avgActual = bucket.actuals.reduce((a, b) => a + b, 0) / bucket.actuals.length;
|
|
147
|
+
const weight = bucket.predictions.length / predictions.length;
|
|
148
|
+
ece += weight * Math.abs(avgPredicted - avgActual);
|
|
149
|
+
}
|
|
150
|
+
return ece;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Check if a model is well-calibrated based on Brier score
|
|
154
|
+
*/
|
|
155
|
+
function isWellCalibrated(brierScore) {
|
|
156
|
+
return brierScore < 0.1;
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Get calibration quality label
|
|
160
|
+
*/
|
|
161
|
+
function getCalibrationQuality(brierScore) {
|
|
162
|
+
if (brierScore < 0.05)
|
|
163
|
+
return 'excellent';
|
|
164
|
+
if (brierScore < 0.1)
|
|
165
|
+
return 'good';
|
|
166
|
+
if (brierScore < 0.2)
|
|
167
|
+
return 'fair';
|
|
168
|
+
return 'poor';
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Format Brier score for display
|
|
172
|
+
*/
|
|
173
|
+
function formatBrierScore(score) {
|
|
174
|
+
return score.toFixed(4);
|
|
175
|
+
}
|
|
176
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"calibration.js","sourceRoot":"","sources":["../../src/api/calibration.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAgMH,kDAUC;AAMD,oCAoCC;AAKD,4CAEC;AAKD,sDAOC;AAKD,4CAEC;AA5QD,2CAAgE;AAoDhE;;GAEG;AACU,QAAA,WAAW,GAAG;IACzB;;;;;;;;;OASG;IACH,KAAK,CAAC,MAAM,CACV,OAAe,EACf,cAA8B;QAE9B,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,OAAO,mBAAmB,cAAc,EAAE,CAClE,CAAC;IACJ,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,UAAU,CAAC,OAAe;QAC9B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,OAAO,EAAE,CAClC,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;OAqBG;IACH,KAAK,CAAC,aAAa,CAAC,KAAyB;QAK3C,OAAO,IAAA,2BAAkB,EAAC,qBAAqB,EAAE;YAC/C,MAAM,EAAE,MAAM;YACd,IAAI,EAAE;gBACJ,GAAG,KAAK;gBACR,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBAC1D,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACzD;SACF,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,OAAO,CACX,OAAe,EACf,UAGI,EAAE;QAON,OAAO,IAAA,2BAAkB,EAAC,wBAAwB,OAAO,EAAE,EAAE;YAC3D,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;SACd,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,kBAAkB,CACtB,OAAe,EACf,cAA8B;QAO9B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,OAAO,mBAAmB,cAAc,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;GAGG;AACH,SAAgB,mBAAmB,CACjC,WAAyD;IAEzD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEvC,MAAM,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE;QAC5D,OAAO,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC;IAC/C,CAAC,EAAE,CAAC,CAAC,CAAC;IAEN,OAAO,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC;AAClC,CAAC;AAED;;;GAGG;AACH,SAAgB,YAAY,CAC1B,WAAyD,EACzD,aAAqB,EAAE;IAEvB,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEvC,MAAM,OAAO,GAAwD,EAAE,CAAC;IACxE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,OAAO,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,gCAAgC;IAChC,KAAK,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAChD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,UAAU,CAAC,EAClC,UAAU,GAAG,CAAC,CACf,CAAC;QACF,OAAO,CAAC,WAAW,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjD,OAAO,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC;IAED,gBAAgB;IAChB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAE9C,MAAM,YAAY,GAChB,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC;QAC5E,MAAM,SAAS,GACb,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;QACpE,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC;QAE9D,GAAG,IAAI,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,GAAG,SAAS,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB,CAAC,UAAkB;IACjD,OAAO,UAAU,GAAG,GAAG,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAgB,qBAAqB,CACnC,UAAkB;IAElB,IAAI,UAAU,GAAG,IAAI;QAAE,OAAO,WAAW,CAAC;IAC1C,IAAI,UAAU,GAAG,GAAG;QAAE,OAAO,MAAM,CAAC;IACpC,IAAI,UAAU,GAAG,GAAG;QAAE,OAAO,MAAM,CAAC;IACpC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB,CAAC,KAAa;IAC5C,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AAC1B,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Calibration API\n *\n * Prediction accuracy tracking with Brier scores and calibration metrics\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\nimport type {\n  CalibrationStatus,\n  CalibrationBucket,\n  PredictionType,\n  ApiResponse,\n} from '../core/types';\n\n// ============================================================================\n// CALIBRATION API CLIENT\n// ============================================================================\n\n/**\n * Record outcome input\n */\nexport interface RecordOutcomeInput {\n  /** Run ID the prediction was made for */\n  runId: string;\n  /** Type of prediction */\n  predictionType: PredictionType;\n  /** The predicted value (0-1 for probabilities) */\n  predictedValue: number;\n  /** The actual outcome (0 or 1 for binary, or actual value) */\n  actualOutcome: number;\n  /** When the prediction was made */\n  predictedAt?: string;\n  /** When the outcome was observed */\n  observedAt?: string;\n}\n\n/**\n * Calibration metrics\n */\nexport interface CalibrationMetrics {\n  agentId: string;\n  predictionType: PredictionType;\n  /** Brier score (lower is better, <0.1 is good) */\n  brierScore: number;\n  /** Expected Calibration Error */\n  ece: number;\n  /** Maximum Calibration Error */\n  mce: number;\n  /** Sample count */\n  sampleCount: number;\n  /** Is the model well-calibrated */\n  isCalibrated: boolean;\n  /** Reliability diagram data */\n  reliabilityDiagram: CalibrationBucket[];\n  /** Last updated */\n  lastUpdated: string;\n}\n\n/**\n * Calibration API client for prediction accuracy tracking\n */\nexport const calibration = {\n  /**\n   * Get calibration status for an agent\n   *\n   * @example\n   * ```typescript\n   * const status = await calibration.status('agent_123', 'churn_risk');\n   * console.log(`Brier score: ${status.brierScore}`);\n   * console.log(`Is calibrated: ${status.isCalibrated}`);\n   * ```\n   */\n  async status(\n    agentId: string,\n    predictionType: PredictionType\n  ): Promise<CalibrationStatus> {\n    return apiRequestWithData<CalibrationStatus>(\n      `/calibration/status/${agentId}?predictionType=${predictionType}`\n    );\n  },\n\n  /**\n   * Get all calibration metrics for an agent\n   *\n   * @example\n   * ```typescript\n   * const metrics = await calibration.allMetrics('agent_123');\n   * for (const m of metrics) {\n   *   console.log(`${m.predictionType}: Brier=${m.brierScore}`);\n   * }\n   * ```\n   */\n  async allMetrics(agentId: string): Promise<CalibrationMetrics[]> {\n    return apiRequestWithData<CalibrationMetrics[]>(\n      `/calibration/metrics/${agentId}`\n    );\n  },\n\n  /**\n   * Record a prediction outcome for calibration tracking\n   *\n   * @example\n   * ```typescript\n   * // Record a churn prediction outcome\n   * await calibration.recordOutcome({\n   *   runId: 'run_abc123',\n   *   predictionType: 'churn_risk',\n   *   predictedValue: 0.7,  // We predicted 70% churn risk\n   *   actualOutcome: 1,     // Customer did churn\n   * });\n   *\n   * // Record a resolution time prediction\n   * await calibration.recordOutcome({\n   *   runId: 'run_abc123',\n   *   predictionType: 'resolution_time',\n   *   predictedValue: 15,   // Predicted 15 minutes\n   *   actualOutcome: 22,    // Actual was 22 minutes\n   * });\n   * ```\n   */\n  async recordOutcome(input: RecordOutcomeInput): Promise<{\n    recorded: boolean;\n    brierContribution: number;\n    message: string;\n  }> {\n    return apiRequestWithData('/calibration/record', {\n      method: 'POST',\n      body: {\n        ...input,\n        predictedAt: input.predictedAt || new Date().toISOString(),\n        observedAt: input.observedAt || new Date().toISOString(),\n      },\n    });\n  },\n\n  /**\n   * Trigger recalibration for an agent\n   *\n   * @example\n   * ```typescript\n   * const result = await calibration.retrain('agent_123', {\n   *   predictionTypes: ['churn_risk', 'escalation_risk'],\n   * });\n   * console.log(`Retrained: ${result.success}`);\n   * ```\n   */\n  async retrain(\n    agentId: string,\n    options: {\n      predictionTypes?: PredictionType[];\n      minSamples?: number;\n    } = {}\n  ): Promise<{\n    success: boolean;\n    retrainedTypes: PredictionType[];\n    skippedTypes: Array<{ type: PredictionType; reason: string }>;\n    newMetrics: CalibrationMetrics[];\n  }> {\n    return apiRequestWithData(`/calibration/retrain/${agentId}`, {\n      method: 'POST',\n      body: options,\n    });\n  },\n\n  /**\n   * Get reliability diagram data for visualization\n   *\n   * @example\n   * ```typescript\n   * const diagram = await calibration.reliabilityDiagram('agent_123', 'outcome');\n   * // Use diagram.buckets to plot predicted vs actual probabilities\n   * ```\n   */\n  async reliabilityDiagram(\n    agentId: string,\n    predictionType: PredictionType\n  ): Promise<{\n    agentId: string;\n    predictionType: PredictionType;\n    buckets: CalibrationBucket[];\n    perfectCalibrationLine: Array<{ x: number; y: number }>;\n  }> {\n    return apiRequestWithData(\n      `/calibration/diagram/${agentId}?predictionType=${predictionType}`\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate Brier score from predictions and outcomes\n * Lower is better, <0.1 is considered good\n */\nexport function calculateBrierScore(\n  predictions: Array<{ predicted: number; actual: number }>\n): number {\n  if (predictions.length === 0) return 0;\n\n  const sum = predictions.reduce((acc, { predicted, actual }) => {\n    return acc + Math.pow(predicted - actual, 2);\n  }, 0);\n\n  return sum / predictions.length;\n}\n\n/**\n * Calculate Expected Calibration Error (ECE)\n * Measures how well-calibrated predictions are across confidence buckets\n */\nexport function calculateECE(\n  predictions: Array<{ predicted: number; actual: number }>,\n  numBuckets: number = 10\n): number {\n  if (predictions.length === 0) return 0;\n\n  const buckets: Array<{ predictions: number[]; actuals: number[] }> = [];\n  for (let i = 0; i < numBuckets; i++) {\n    buckets.push({ predictions: [], actuals: [] });\n  }\n\n  // Assign predictions to buckets\n  for (const { predicted, actual } of predictions) {\n    const bucketIndex = Math.min(\n      Math.floor(predicted * numBuckets),\n      numBuckets - 1\n    );\n    buckets[bucketIndex].predictions.push(predicted);\n    buckets[bucketIndex].actuals.push(actual);\n  }\n\n  // Calculate ECE\n  let ece = 0;\n  for (const bucket of buckets) {\n    if (bucket.predictions.length === 0) continue;\n\n    const avgPredicted =\n      bucket.predictions.reduce((a, b) => a + b, 0) / bucket.predictions.length;\n    const avgActual =\n      bucket.actuals.reduce((a, b) => a + b, 0) / bucket.actuals.length;\n    const weight = bucket.predictions.length / predictions.length;\n\n    ece += weight * Math.abs(avgPredicted - avgActual);\n  }\n\n  return ece;\n}\n\n/**\n * Check if a model is well-calibrated based on Brier score\n */\nexport function isWellCalibrated(brierScore: number): boolean {\n  return brierScore < 0.1;\n}\n\n/**\n * Get calibration quality label\n */\nexport function getCalibrationQuality(\n  brierScore: number\n): 'excellent' | 'good' | 'fair' | 'poor' {\n  if (brierScore < 0.05) return 'excellent';\n  if (brierScore < 0.1) return 'good';\n  if (brierScore < 0.2) return 'fair';\n  return 'poor';\n}\n\n/**\n * Format Brier score for display\n */\nexport function formatBrierScore(score: number): string {\n  return score.toFixed(4);\n}\n"]}
|