@thinkhive/sdk 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/MIGRATION.md ADDED
@@ -0,0 +1,274 @@
1
+ # Migration Guide: v2.x to v3.0
2
+
3
+ This guide helps you migrate from ThinkHive SDK v2.x to v3.0.
4
+
5
+ ## Breaking Changes
6
+
7
+ ### Package Name Change
8
+
9
+ ```diff
10
+ - npm install thinkhive-sdk
11
+ + npm install @thinkhive/sdk
12
+ ```
13
+
14
+ Update your imports:
15
+
16
+ ```diff
17
+ - import ThinkHive from 'thinkhive-sdk';
18
+ + import ThinkHive from '@thinkhive/sdk';
19
+ ```
20
+
21
+ ### Minimum Node.js Version
22
+
23
+ - **v2.x**: Node.js 16+
24
+ - **v3.0**: Node.js 18+
25
+
26
+ ### Core Concepts Change
27
+
28
+ v3 is **run-centric**, not trace-centric:
29
+
30
+ | v2 Concept | v3 Concept |
31
+ |------------|------------|
32
+ | `trace` | `run` (atomic unit) |
33
+ | `TraceOptions` | `RunOptions` |
34
+ | `explainer.analyze()` | `runs.create()` + `claims.getRunAnalysis()` |
35
+ | `businessContext` | `customerContext` (time-series snapshot) |
36
+ | Analysis results | Claims (facts vs inferences) |
37
+
38
+ ## Migration Steps
39
+
40
+ ### 1. Update Initialization
41
+
42
+ ```typescript
43
+ // v2
44
+ import { init } from 'thinkhive-sdk';
45
+
46
+ init({
47
+ apiKey: 'th_xxx',
48
+ serviceName: 'my-agent',
49
+ });
50
+
51
+ // v3
52
+ import { init } from '@thinkhive/sdk';
53
+
54
+ init({
55
+ apiKey: 'th_xxx',
56
+ serviceName: 'my-agent',
57
+ apiVersion: 'v3', // Default is v3
58
+ });
59
+ ```
60
+
61
+ ### 2. Migrate Trace Creation to Runs
62
+
63
+ ```typescript
64
+ // v2 - Trace-based
65
+ import { explainer } from 'thinkhive-sdk';
66
+
67
+ const result = await explainer.analyze({
68
+ userMessage: 'Help me with my order',
69
+ agentResponse: 'I found your order...',
70
+ outcome: 'success',
71
+ businessContext: {
72
+ customerId: 'cust_123',
73
+ transactionValue: 500,
74
+ },
75
+ });
76
+
77
+ // v3 - Run-based
78
+ import { runs, claims } from '@thinkhive/sdk';
79
+
80
+ // Create a run
81
+ const run = await runs.create({
82
+ agentId: 'agent_123',
83
+ conversationMessages: [
84
+ { role: 'user', content: 'Help me with my order' },
85
+ { role: 'assistant', content: 'I found your order...' },
86
+ ],
87
+ outcome: 'resolved',
88
+ customerContext: {
89
+ customerId: 'cust_123',
90
+ arr: 50000, // Customer ARR at run time
91
+ healthScore: 85, // Health score at run time
92
+ capturedAt: new Date().toISOString(),
93
+ },
94
+ });
95
+
96
+ // Get analysis with claims (facts vs inferences)
97
+ const analysis = await claims.getRunAnalysis(run.id);
98
+ ```
99
+
100
+ ### 3. Migrate Business Context to Customer Context Snapshots
101
+
102
+ v3 uses **time-series snapshots** instead of current values:
103
+
104
+ ```typescript
105
+ // v2 - Current values
106
+ const result = await explainer.analyze({
107
+ userMessage: '...',
108
+ agentResponse: '...',
109
+ businessContext: {
110
+ customerId: 'cust_123',
111
+ transactionValue: 500,
112
+ },
113
+ });
114
+
115
+ // v3 - Point-in-time snapshots
116
+ import { customerContext, runs } from '@thinkhive/sdk';
117
+
118
+ // First, capture customer metrics
119
+ const snapshot = await customerContext.captureSnapshot('cust_123', {
120
+ arr: 100000,
121
+ healthScore: 85,
122
+ segment: 'enterprise',
123
+ });
124
+
125
+ // Use the snapshot in your run
126
+ const run = await runs.create({
127
+ agentId: 'agent_123',
128
+ conversationMessages: [...],
129
+ customerContext: {
130
+ customerId: 'cust_123',
131
+ arr: snapshot.arr,
132
+ healthScore: snapshot.healthScore,
133
+ capturedAt: snapshot.capturedAt,
134
+ },
135
+ });
136
+ ```
137
+
138
+ ### 4. Migrate to Claims API (Facts vs Inferences)
139
+
140
+ v3 separates facts from inferences:
141
+
142
+ ```typescript
143
+ // v2 - Single analysis result
144
+ const result = await explainer.analyze({...});
145
+ console.log(result.summary);
146
+ console.log(result.outcome.verdict);
147
+
148
+ // v3 - Claims with evidence
149
+ import { claims, isFact, isInference } from '@thinkhive/sdk';
150
+
151
+ const analysis = await claims.getRunAnalysis(run.id);
152
+
153
+ // Get all claims
154
+ for (const claim of analysis.claims) {
155
+ console.log(`[${claim.claimType}] ${claim.claimText}`);
156
+ console.log(`Confidence: ${claim.confidence}`);
157
+
158
+ if (isFact(claim)) {
159
+ console.log('This is an observed fact');
160
+ } else if (isInference(claim)) {
161
+ console.log('This is an LLM inference');
162
+ }
163
+ }
164
+
165
+ // Get facts vs inferences summary
166
+ const summary = await claims.summary({ runId: run.id });
167
+ console.log(`Facts: ${summary.observed.count}`);
168
+ console.log(`Inferences: ${summary.inferred.count}`);
169
+ ```
170
+
171
+ ### 5. Add Ticket Linking (New in v3)
172
+
173
+ ```typescript
174
+ import { runs, generateZendeskMarker, linkRunToZendeskTicket } from '@thinkhive/sdk';
175
+
176
+ // Method 1: Embed marker in agent response
177
+ const run = await runs.create({
178
+ agentId: 'agent_123',
179
+ conversationMessages: [...],
180
+ });
181
+
182
+ const marker = generateZendeskMarker(run.id);
183
+ const responseWithMarker = `Your order is on the way! ${marker}`;
184
+ // Send responseWithMarker to Zendesk
185
+
186
+ // Method 2: Explicit linking
187
+ await linkRunToZendeskTicket(run.id, '12345');
188
+ ```
189
+
190
+ ### 6. Add Calibration Tracking (New in v3)
191
+
192
+ ```typescript
193
+ import { calibration } from '@thinkhive/sdk';
194
+
195
+ // Check calibration status
196
+ const status = await calibration.status('agent_123', 'churn_risk');
197
+ console.log(`Brier score: ${status.brierScore}`);
198
+ console.log(`Is calibrated: ${status.isCalibrated}`);
199
+
200
+ // Record prediction outcomes
201
+ await calibration.recordOutcome({
202
+ runId: run.id,
203
+ predictionType: 'churn_risk',
204
+ predictedValue: 0.7, // We predicted 70% churn risk
205
+ actualOutcome: 1, // Customer did churn
206
+ });
207
+ ```
208
+
209
+ ## Deprecated APIs
210
+
211
+ The following v2 APIs still work but are deprecated:
212
+
213
+ ```typescript
214
+ // Deprecated - use runs.create() + claims.getRunAnalysis()
215
+ import { explainer } from '@thinkhive/sdk';
216
+ const result = await explainer.analyze({...}); // Still works
217
+
218
+ // Deprecated types
219
+ import type { TraceOptions, BusinessContext } from '@thinkhive/sdk';
220
+ // Use RunOptions, CustomerContextSnapshot instead
221
+ ```
222
+
223
+ ## New Instrumentation
224
+
225
+ ### OpenAI Assistants
226
+
227
+ ```typescript
228
+ import { wrapAssistantRun } from '@thinkhive/sdk/instrumentation/openai';
229
+
230
+ const run = await wrapAssistantRun(
231
+ () => openai.beta.threads.runs.create(threadId, { assistant_id: assistantId }),
232
+ { assistantId, threadId }
233
+ );
234
+ ```
235
+
236
+ ### LangGraph
237
+
238
+ ```typescript
239
+ import { wrapLangGraphNode, wrapLangGraphExecution } from '@thinkhive/sdk/instrumentation/langchain';
240
+
241
+ // Wrap individual nodes
242
+ workflow.addNode('agent', wrapLangGraphNode('agent', agentFunction));
243
+
244
+ // Wrap entire workflow
245
+ const result = await wrapLangGraphExecution('support_workflow', () =>
246
+ compiledGraph.invoke({ messages: [...] })
247
+ );
248
+ ```
249
+
250
+ ## TypeScript Changes
251
+
252
+ ```typescript
253
+ // v2 types
254
+ import type { TraceOptions, SpanData, BusinessContext } from 'thinkhive-sdk';
255
+
256
+ // v3 types
257
+ import type {
258
+ RunOptions,
259
+ RunOutcome,
260
+ ConversationMessage,
261
+ CustomerContextSnapshot,
262
+ Claim,
263
+ ClaimType,
264
+ AnalysisResult,
265
+ LinkMethod,
266
+ CalibrationStatus,
267
+ } from '@thinkhive/sdk';
268
+ ```
269
+
270
+ ## Need Help?
271
+
272
+ - Documentation: https://docs.thinkhive.ai
273
+ - API Reference: https://api.thinkhive.ai/docs
274
+ - Support: support@thinkhive.ai
@@ -0,0 +1,168 @@
1
+ /**
2
+ * ThinkHive SDK v3.0 - Calibration API
3
+ *
4
+ * Prediction accuracy tracking with Brier scores and calibration metrics
5
+ */
6
+ import type { CalibrationStatus, CalibrationBucket, PredictionType } from '../core/types';
7
+ /**
8
+ * Record outcome input
9
+ */
10
+ export interface RecordOutcomeInput {
11
+ /** Run ID the prediction was made for */
12
+ runId: string;
13
+ /** Type of prediction */
14
+ predictionType: PredictionType;
15
+ /** The predicted value (0-1 for probabilities) */
16
+ predictedValue: number;
17
+ /** The actual outcome (0 or 1 for binary, or actual value) */
18
+ actualOutcome: number;
19
+ /** When the prediction was made */
20
+ predictedAt?: string;
21
+ /** When the outcome was observed */
22
+ observedAt?: string;
23
+ }
24
+ /**
25
+ * Calibration metrics
26
+ */
27
+ export interface CalibrationMetrics {
28
+ agentId: string;
29
+ predictionType: PredictionType;
30
+ /** Brier score (lower is better, <0.1 is good) */
31
+ brierScore: number;
32
+ /** Expected Calibration Error */
33
+ ece: number;
34
+ /** Maximum Calibration Error */
35
+ mce: number;
36
+ /** Sample count */
37
+ sampleCount: number;
38
+ /** Is the model well-calibrated */
39
+ isCalibrated: boolean;
40
+ /** Reliability diagram data */
41
+ reliabilityDiagram: CalibrationBucket[];
42
+ /** Last updated */
43
+ lastUpdated: string;
44
+ }
45
+ /**
46
+ * Calibration API client for prediction accuracy tracking
47
+ */
48
+ export declare const calibration: {
49
+ /**
50
+ * Get calibration status for an agent
51
+ *
52
+ * @example
53
+ * ```typescript
54
+ * const status = await calibration.status('agent_123', 'churn_risk');
55
+ * console.log(`Brier score: ${status.brierScore}`);
56
+ * console.log(`Is calibrated: ${status.isCalibrated}`);
57
+ * ```
58
+ */
59
+ status(agentId: string, predictionType: PredictionType): Promise<CalibrationStatus>;
60
+ /**
61
+ * Get all calibration metrics for an agent
62
+ *
63
+ * @example
64
+ * ```typescript
65
+ * const metrics = await calibration.allMetrics('agent_123');
66
+ * for (const m of metrics) {
67
+ * console.log(`${m.predictionType}: Brier=${m.brierScore}`);
68
+ * }
69
+ * ```
70
+ */
71
+ allMetrics(agentId: string): Promise<CalibrationMetrics[]>;
72
+ /**
73
+ * Record a prediction outcome for calibration tracking
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * // Record a churn prediction outcome
78
+ * await calibration.recordOutcome({
79
+ * runId: 'run_abc123',
80
+ * predictionType: 'churn_risk',
81
+ * predictedValue: 0.7, // We predicted 70% churn risk
82
+ * actualOutcome: 1, // Customer did churn
83
+ * });
84
+ *
85
+ * // Record a resolution time prediction
86
+ * await calibration.recordOutcome({
87
+ * runId: 'run_abc123',
88
+ * predictionType: 'resolution_time',
89
+ * predictedValue: 15, // Predicted 15 minutes
90
+ * actualOutcome: 22, // Actual was 22 minutes
91
+ * });
92
+ * ```
93
+ */
94
+ recordOutcome(input: RecordOutcomeInput): Promise<{
95
+ recorded: boolean;
96
+ brierContribution: number;
97
+ message: string;
98
+ }>;
99
+ /**
100
+ * Trigger recalibration for an agent
101
+ *
102
+ * @example
103
+ * ```typescript
104
+ * const result = await calibration.retrain('agent_123', {
105
+ * predictionTypes: ['churn_risk', 'escalation_risk'],
106
+ * });
107
+ * console.log(`Retrained: ${result.success}`);
108
+ * ```
109
+ */
110
+ retrain(agentId: string, options?: {
111
+ predictionTypes?: PredictionType[];
112
+ minSamples?: number;
113
+ }): Promise<{
114
+ success: boolean;
115
+ retrainedTypes: PredictionType[];
116
+ skippedTypes: Array<{
117
+ type: PredictionType;
118
+ reason: string;
119
+ }>;
120
+ newMetrics: CalibrationMetrics[];
121
+ }>;
122
+ /**
123
+ * Get reliability diagram data for visualization
124
+ *
125
+ * @example
126
+ * ```typescript
127
+ * const diagram = await calibration.reliabilityDiagram('agent_123', 'outcome');
128
+ * // Use diagram.buckets to plot predicted vs actual probabilities
129
+ * ```
130
+ */
131
+ reliabilityDiagram(agentId: string, predictionType: PredictionType): Promise<{
132
+ agentId: string;
133
+ predictionType: PredictionType;
134
+ buckets: CalibrationBucket[];
135
+ perfectCalibrationLine: Array<{
136
+ x: number;
137
+ y: number;
138
+ }>;
139
+ }>;
140
+ };
141
+ /**
142
+ * Calculate Brier score from predictions and outcomes
143
+ * Lower is better, <0.1 is considered good
144
+ */
145
+ export declare function calculateBrierScore(predictions: Array<{
146
+ predicted: number;
147
+ actual: number;
148
+ }>): number;
149
+ /**
150
+ * Calculate Expected Calibration Error (ECE)
151
+ * Measures how well-calibrated predictions are across confidence buckets
152
+ */
153
+ export declare function calculateECE(predictions: Array<{
154
+ predicted: number;
155
+ actual: number;
156
+ }>, numBuckets?: number): number;
157
+ /**
158
+ * Check if a model is well-calibrated based on Brier score
159
+ */
160
+ export declare function isWellCalibrated(brierScore: number): boolean;
161
+ /**
162
+ * Get calibration quality label
163
+ */
164
+ export declare function getCalibrationQuality(brierScore: number): 'excellent' | 'good' | 'fair' | 'poor';
165
+ /**
166
+ * Format Brier score for display
167
+ */
168
+ export declare function formatBrierScore(score: number): string;
@@ -0,0 +1,176 @@
1
+ "use strict";
2
+ /**
3
+ * ThinkHive SDK v3.0 - Calibration API
4
+ *
5
+ * Prediction accuracy tracking with Brier scores and calibration metrics
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.calibration = void 0;
9
+ exports.calculateBrierScore = calculateBrierScore;
10
+ exports.calculateECE = calculateECE;
11
+ exports.isWellCalibrated = isWellCalibrated;
12
+ exports.getCalibrationQuality = getCalibrationQuality;
13
+ exports.formatBrierScore = formatBrierScore;
14
+ const client_1 = require("../core/client");
15
+ /**
16
+ * Calibration API client for prediction accuracy tracking
17
+ */
18
+ exports.calibration = {
19
+ /**
20
+ * Get calibration status for an agent
21
+ *
22
+ * @example
23
+ * ```typescript
24
+ * const status = await calibration.status('agent_123', 'churn_risk');
25
+ * console.log(`Brier score: ${status.brierScore}`);
26
+ * console.log(`Is calibrated: ${status.isCalibrated}`);
27
+ * ```
28
+ */
29
+ async status(agentId, predictionType) {
30
+ return (0, client_1.apiRequestWithData)(`/calibration/status/${agentId}?predictionType=${predictionType}`);
31
+ },
32
+ /**
33
+ * Get all calibration metrics for an agent
34
+ *
35
+ * @example
36
+ * ```typescript
37
+ * const metrics = await calibration.allMetrics('agent_123');
38
+ * for (const m of metrics) {
39
+ * console.log(`${m.predictionType}: Brier=${m.brierScore}`);
40
+ * }
41
+ * ```
42
+ */
43
+ async allMetrics(agentId) {
44
+ return (0, client_1.apiRequestWithData)(`/calibration/metrics/${agentId}`);
45
+ },
46
+ /**
47
+ * Record a prediction outcome for calibration tracking
48
+ *
49
+ * @example
50
+ * ```typescript
51
+ * // Record a churn prediction outcome
52
+ * await calibration.recordOutcome({
53
+ * runId: 'run_abc123',
54
+ * predictionType: 'churn_risk',
55
+ * predictedValue: 0.7, // We predicted 70% churn risk
56
+ * actualOutcome: 1, // Customer did churn
57
+ * });
58
+ *
59
+ * // Record a resolution time prediction
60
+ * await calibration.recordOutcome({
61
+ * runId: 'run_abc123',
62
+ * predictionType: 'resolution_time',
63
+ * predictedValue: 15, // Predicted 15 minutes
64
+ * actualOutcome: 22, // Actual was 22 minutes
65
+ * });
66
+ * ```
67
+ */
68
+ async recordOutcome(input) {
69
+ return (0, client_1.apiRequestWithData)('/calibration/record', {
70
+ method: 'POST',
71
+ body: {
72
+ ...input,
73
+ predictedAt: input.predictedAt || new Date().toISOString(),
74
+ observedAt: input.observedAt || new Date().toISOString(),
75
+ },
76
+ });
77
+ },
78
+ /**
79
+ * Trigger recalibration for an agent
80
+ *
81
+ * @example
82
+ * ```typescript
83
+ * const result = await calibration.retrain('agent_123', {
84
+ * predictionTypes: ['churn_risk', 'escalation_risk'],
85
+ * });
86
+ * console.log(`Retrained: ${result.success}`);
87
+ * ```
88
+ */
89
+ async retrain(agentId, options = {}) {
90
+ return (0, client_1.apiRequestWithData)(`/calibration/retrain/${agentId}`, {
91
+ method: 'POST',
92
+ body: options,
93
+ });
94
+ },
95
+ /**
96
+ * Get reliability diagram data for visualization
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * const diagram = await calibration.reliabilityDiagram('agent_123', 'outcome');
101
+ * // Use diagram.buckets to plot predicted vs actual probabilities
102
+ * ```
103
+ */
104
+ async reliabilityDiagram(agentId, predictionType) {
105
+ return (0, client_1.apiRequestWithData)(`/calibration/diagram/${agentId}?predictionType=${predictionType}`);
106
+ },
107
+ };
108
+ // ============================================================================
109
+ // HELPER FUNCTIONS
110
+ // ============================================================================
111
+ /**
112
+ * Calculate Brier score from predictions and outcomes
113
+ * Lower is better, <0.1 is considered good
114
+ */
115
+ function calculateBrierScore(predictions) {
116
+ if (predictions.length === 0)
117
+ return 0;
118
+ const sum = predictions.reduce((acc, { predicted, actual }) => {
119
+ return acc + Math.pow(predicted - actual, 2);
120
+ }, 0);
121
+ return sum / predictions.length;
122
+ }
123
+ /**
124
+ * Calculate Expected Calibration Error (ECE)
125
+ * Measures how well-calibrated predictions are across confidence buckets
126
+ */
127
+ function calculateECE(predictions, numBuckets = 10) {
128
+ if (predictions.length === 0)
129
+ return 0;
130
+ const buckets = [];
131
+ for (let i = 0; i < numBuckets; i++) {
132
+ buckets.push({ predictions: [], actuals: [] });
133
+ }
134
+ // Assign predictions to buckets
135
+ for (const { predicted, actual } of predictions) {
136
+ const bucketIndex = Math.min(Math.floor(predicted * numBuckets), numBuckets - 1);
137
+ buckets[bucketIndex].predictions.push(predicted);
138
+ buckets[bucketIndex].actuals.push(actual);
139
+ }
140
+ // Calculate ECE
141
+ let ece = 0;
142
+ for (const bucket of buckets) {
143
+ if (bucket.predictions.length === 0)
144
+ continue;
145
+ const avgPredicted = bucket.predictions.reduce((a, b) => a + b, 0) / bucket.predictions.length;
146
+ const avgActual = bucket.actuals.reduce((a, b) => a + b, 0) / bucket.actuals.length;
147
+ const weight = bucket.predictions.length / predictions.length;
148
+ ece += weight * Math.abs(avgPredicted - avgActual);
149
+ }
150
+ return ece;
151
+ }
152
+ /**
153
+ * Check if a model is well-calibrated based on Brier score
154
+ */
155
+ function isWellCalibrated(brierScore) {
156
+ return brierScore < 0.1;
157
+ }
158
+ /**
159
+ * Get calibration quality label
160
+ */
161
+ function getCalibrationQuality(brierScore) {
162
+ if (brierScore < 0.05)
163
+ return 'excellent';
164
+ if (brierScore < 0.1)
165
+ return 'good';
166
+ if (brierScore < 0.2)
167
+ return 'fair';
168
+ return 'poor';
169
+ }
170
+ /**
171
+ * Format Brier score for display
172
+ */
173
+ function formatBrierScore(score) {
174
+ return score.toFixed(4);
175
+ }
176
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"calibration.js","sourceRoot":"","sources":["../../src/api/calibration.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAgMH,kDAUC;AAMD,oCAoCC;AAKD,4CAEC;AAKD,sDAOC;AAKD,4CAEC;AA5QD,2CAAgE;AAoDhE;;GAEG;AACU,QAAA,WAAW,GAAG;IACzB;;;;;;;;;OASG;IACH,KAAK,CAAC,MAAM,CACV,OAAe,EACf,cAA8B;QAE9B,OAAO,IAAA,2BAAkB,EACvB,uBAAuB,OAAO,mBAAmB,cAAc,EAAE,CAClE,CAAC;IACJ,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,UAAU,CAAC,OAAe;QAC9B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,OAAO,EAAE,CAClC,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;OAqBG;IACH,KAAK,CAAC,aAAa,CAAC,KAAyB;QAK3C,OAAO,IAAA,2BAAkB,EAAC,qBAAqB,EAAE;YAC/C,MAAM,EAAE,MAAM;YACd,IAAI,EAAE;gBACJ,GAAG,KAAK;gBACR,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBAC1D,UAAU,EAAE,KAAK,CAAC,UAAU,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACzD;SACF,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,OAAO,CACX,OAAe,EACf,UAGI,EAAE;QAON,OAAO,IAAA,2BAAkB,EAAC,wBAAwB,OAAO,EAAE,EAAE;YAC3D,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;SACd,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,kBAAkB,CACtB,OAAe,EACf,cAA8B;QAO9B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,OAAO,mBAAmB,cAAc,EAAE,CACnE,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;GAGG;AACH,SAAgB,mBAAmB,CACjC,WAAyD;IAEzD,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEvC,MAAM,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE;QAC5D,OAAO,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC;IAC/C,CAAC,EAAE,CAAC,CAAC,CAAC;IAEN,OAAO,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC;AAClC,CAAC;AAED;;;GAGG;AACH,SAAgB,YAAY,CAC1B,WAAyD,EACzD,aAAqB,EAAE;IAEvB,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEvC,MAAM,OAAO,GAAwD,EAAE,CAAC;IACxE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,OAAO,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,gCAAgC;IAChC,KAAK,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAChD,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAC1B,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,UAAU,CAAC,EAClC,UAAU,GAAG,CAAC,CACf,CAAC;QACF,OAAO,CAAC,WAAW,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACjD,OAAO,CAAC,WAAW,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC;IAED,gBAAgB;IAChB,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAE9C,MAAM,YAAY,GAChB,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC;QAC5E,MAAM,SAAS,GACb,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;QACpE,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC;QAE9D,GAAG,IAAI,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,GAAG,SAAS,CAAC,CAAC;IACrD,CAAC;IAED,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB,CAAC,UAAkB;IACjD,OAAO,UAAU,GAAG,GAAG,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAgB,qBAAqB,CACnC,UAAkB;IAElB,IAAI,UAAU,GAAG,IAAI;QAAE,OAAO,WAAW,CAAC;IAC1C,IAAI,UAAU,GAAG,GAAG;QAAE,OAAO,MAAM,CAAC;IACpC,IAAI,UAAU,GAAG,GAAG;QAAE,OAAO,MAAM,CAAC;IACpC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB,CAAC,KAAa;IAC5C,OAAO,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;AAC1B,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Calibration API\n *\n * Prediction accuracy tracking with Brier scores and calibration metrics\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\nimport type {\n  CalibrationStatus,\n  CalibrationBucket,\n  PredictionType,\n  ApiResponse,\n} from '../core/types';\n\n// ============================================================================\n// CALIBRATION API CLIENT\n// ============================================================================\n\n/**\n * Record outcome input\n */\nexport interface RecordOutcomeInput {\n  /** Run ID the prediction was made for */\n  runId: string;\n  /** Type of prediction */\n  predictionType: PredictionType;\n  /** The predicted value (0-1 for probabilities) */\n  predictedValue: number;\n  /** The actual outcome (0 or 1 for binary, or actual value) */\n  actualOutcome: number;\n  /** When the prediction was made */\n  predictedAt?: string;\n  /** When the outcome was observed */\n  observedAt?: string;\n}\n\n/**\n * Calibration metrics\n */\nexport interface CalibrationMetrics {\n  agentId: string;\n  predictionType: PredictionType;\n  /** Brier score (lower is better, <0.1 is good) */\n  brierScore: number;\n  /** Expected Calibration Error */\n  ece: number;\n  /** Maximum Calibration Error */\n  mce: number;\n  /** Sample count */\n  sampleCount: number;\n  /** Is the model well-calibrated */\n  isCalibrated: boolean;\n  /** Reliability diagram data */\n  reliabilityDiagram: CalibrationBucket[];\n  /** Last updated */\n  lastUpdated: string;\n}\n\n/**\n * Calibration API client for prediction accuracy tracking\n */\nexport const calibration = {\n  /**\n   * Get calibration status for an agent\n   *\n   * @example\n   * ```typescript\n   * const status = await calibration.status('agent_123', 'churn_risk');\n   * console.log(`Brier score: ${status.brierScore}`);\n   * console.log(`Is calibrated: ${status.isCalibrated}`);\n   * ```\n   */\n  async status(\n    agentId: string,\n    predictionType: PredictionType\n  ): Promise<CalibrationStatus> {\n    return apiRequestWithData<CalibrationStatus>(\n      `/calibration/status/${agentId}?predictionType=${predictionType}`\n    );\n  },\n\n  /**\n   * Get all calibration metrics for an agent\n   *\n   * @example\n   * ```typescript\n   * const metrics = await calibration.allMetrics('agent_123');\n   * for (const m of metrics) {\n   *   console.log(`${m.predictionType}: Brier=${m.brierScore}`);\n   * }\n   * ```\n   */\n  async allMetrics(agentId: string): Promise<CalibrationMetrics[]> {\n    return apiRequestWithData<CalibrationMetrics[]>(\n      `/calibration/metrics/${agentId}`\n    );\n  },\n\n  /**\n   * Record a prediction outcome for calibration tracking\n   *\n   * @example\n   * ```typescript\n   * // Record a churn prediction outcome\n   * await calibration.recordOutcome({\n   *   runId: 'run_abc123',\n   *   predictionType: 'churn_risk',\n   *   predictedValue: 0.7,  // We predicted 70% churn risk\n   *   actualOutcome: 1,     // Customer did churn\n   * });\n   *\n   * // Record a resolution time prediction\n   * await calibration.recordOutcome({\n   *   runId: 'run_abc123',\n   *   predictionType: 'resolution_time',\n   *   predictedValue: 15,   // Predicted 15 minutes\n   *   actualOutcome: 22,    // Actual was 22 minutes\n   * });\n   * ```\n   */\n  async recordOutcome(input: RecordOutcomeInput): Promise<{\n    recorded: boolean;\n    brierContribution: number;\n    message: string;\n  }> {\n    return apiRequestWithData('/calibration/record', {\n      method: 'POST',\n      body: {\n        ...input,\n        predictedAt: input.predictedAt || new Date().toISOString(),\n        observedAt: input.observedAt || new Date().toISOString(),\n      },\n    });\n  },\n\n  /**\n   * Trigger recalibration for an agent\n   *\n   * @example\n   * ```typescript\n   * const result = await calibration.retrain('agent_123', {\n   *   predictionTypes: ['churn_risk', 'escalation_risk'],\n   * });\n   * console.log(`Retrained: ${result.success}`);\n   * ```\n   */\n  async retrain(\n    agentId: string,\n    options: {\n      predictionTypes?: PredictionType[];\n      minSamples?: number;\n    } = {}\n  ): Promise<{\n    success: boolean;\n    retrainedTypes: PredictionType[];\n    skippedTypes: Array<{ type: PredictionType; reason: string }>;\n    newMetrics: CalibrationMetrics[];\n  }> {\n    return apiRequestWithData(`/calibration/retrain/${agentId}`, {\n      method: 'POST',\n      body: options,\n    });\n  },\n\n  /**\n   * Get reliability diagram data for visualization\n   *\n   * @example\n   * ```typescript\n   * const diagram = await calibration.reliabilityDiagram('agent_123', 'outcome');\n   * // Use diagram.buckets to plot predicted vs actual probabilities\n   * ```\n   */\n  async reliabilityDiagram(\n    agentId: string,\n    predictionType: PredictionType\n  ): Promise<{\n    agentId: string;\n    predictionType: PredictionType;\n    buckets: CalibrationBucket[];\n    perfectCalibrationLine: Array<{ x: number; y: number }>;\n  }> {\n    return apiRequestWithData(\n      `/calibration/diagram/${agentId}?predictionType=${predictionType}`\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate Brier score from predictions and outcomes\n * Lower is better, <0.1 is considered good\n */\nexport function calculateBrierScore(\n  predictions: Array<{ predicted: number; actual: number }>\n): number {\n  if (predictions.length === 0) return 0;\n\n  const sum = predictions.reduce((acc, { predicted, actual }) => {\n    return acc + Math.pow(predicted - actual, 2);\n  }, 0);\n\n  return sum / predictions.length;\n}\n\n/**\n * Calculate Expected Calibration Error (ECE)\n * Measures how well-calibrated predictions are across confidence buckets\n */\nexport function calculateECE(\n  predictions: Array<{ predicted: number; actual: number }>,\n  numBuckets: number = 10\n): number {\n  if (predictions.length === 0) return 0;\n\n  const buckets: Array<{ predictions: number[]; actuals: number[] }> = [];\n  for (let i = 0; i < numBuckets; i++) {\n    buckets.push({ predictions: [], actuals: [] });\n  }\n\n  // Assign predictions to buckets\n  for (const { predicted, actual } of predictions) {\n    const bucketIndex = Math.min(\n      Math.floor(predicted * numBuckets),\n      numBuckets - 1\n    );\n    buckets[bucketIndex].predictions.push(predicted);\n    buckets[bucketIndex].actuals.push(actual);\n  }\n\n  // Calculate ECE\n  let ece = 0;\n  for (const bucket of buckets) {\n    if (bucket.predictions.length === 0) continue;\n\n    const avgPredicted =\n      bucket.predictions.reduce((a, b) => a + b, 0) / bucket.predictions.length;\n    const avgActual =\n      bucket.actuals.reduce((a, b) => a + b, 0) / bucket.actuals.length;\n    const weight = bucket.predictions.length / predictions.length;\n\n    ece += weight * Math.abs(avgPredicted - avgActual);\n  }\n\n  return ece;\n}\n\n/**\n * Check if a model is well-calibrated based on Brier score\n */\nexport function isWellCalibrated(brierScore: number): boolean {\n  return brierScore < 0.1;\n}\n\n/**\n * Get calibration quality label\n */\nexport function getCalibrationQuality(\n  brierScore: number\n): 'excellent' | 'good' | 'fair' | 'poor' {\n  if (brierScore < 0.05) return 'excellent';\n  if (brierScore < 0.1) return 'good';\n  if (brierScore < 0.2) return 'fair';\n  return 'poor';\n}\n\n/**\n * Format Brier score for display\n */\nexport function formatBrierScore(score: number): string {\n  return score.toFixed(4);\n}\n"]}