@thinkhive/sdk 3.1.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,300 @@
1
+ /**
2
+ * ThinkHive SDK v3.0 - Non-Determinism API
3
+ *
4
+ * API for pass@k / pass^k analysis to measure LLM evaluation reliability
5
+ */
6
+ export type NondeterminismRunType = 'pass_at_k' | 'pass_to_k' | 'variance' | 'reliability';
7
+ export type NondeterminismRunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
8
+ export interface NondeterminismRun {
9
+ id: string;
10
+ companyId: string;
11
+ agentId: string;
12
+ runType: NondeterminismRunType;
13
+ kValue: number;
14
+ status: NondeterminismRunStatus;
15
+ traceCount: number;
16
+ criterionId?: string;
17
+ criteriaIds: string[];
18
+ temperature?: string;
19
+ model?: string;
20
+ progressPercent: number;
21
+ passAtKRate?: string;
22
+ passToKRate?: string;
23
+ avgVariance?: string;
24
+ reliabilityScore?: string;
25
+ startedAt?: string;
26
+ completedAt?: string;
27
+ createdBy?: string;
28
+ createdAt: string;
29
+ }
30
+ export interface NondeterminismSample {
31
+ id: string;
32
+ runId: string;
33
+ traceId: string;
34
+ criterionId: string;
35
+ sampleIndex: number;
36
+ score: string;
37
+ passed: boolean;
38
+ reasoning?: string;
39
+ confidence?: string;
40
+ tokensUsed?: number;
41
+ costUsd?: string;
42
+ model?: string;
43
+ temperature?: string;
44
+ latencyMs?: number;
45
+ error?: string;
46
+ createdAt: string;
47
+ }
48
+ export interface CreateRunOptions {
49
+ agentId: string;
50
+ criterionId?: string;
51
+ criteriaIds?: string[];
52
+ kValue: number;
53
+ traceIds: string[];
54
+ runType?: NondeterminismRunType;
55
+ temperature?: number;
56
+ model?: string;
57
+ }
58
+ export interface RecordSampleOptions {
59
+ runId: string;
60
+ traceId: string;
61
+ criterionId: string;
62
+ sampleIndex: number;
63
+ score: number;
64
+ passed: boolean;
65
+ reasoning?: string;
66
+ confidence?: number;
67
+ tokensUsed?: number;
68
+ costUsd?: number;
69
+ model?: string;
70
+ temperature?: number;
71
+ latencyMs?: number;
72
+ error?: string;
73
+ }
74
+ export interface TraceAnalysis {
75
+ traceId: string;
76
+ samples: NondeterminismSample[];
77
+ passCount: number;
78
+ totalCount: number;
79
+ passRate: number;
80
+ scoreVariance: number;
81
+ meanScore: number;
82
+ isConsistent: boolean;
83
+ }
84
+ export interface CriterionAnalysis {
85
+ criterionId: string;
86
+ traceAnalyses: TraceAnalysis[];
87
+ passAtKRate: number;
88
+ passToKRate: number;
89
+ reliabilityScore: number;
90
+ isReliable: boolean;
91
+ recommendation: string;
92
+ }
93
+ export interface RunSummary {
94
+ run: NondeterminismRun;
95
+ traceAnalyses: TraceAnalysis[];
96
+ criterionAnalyses: CriterionAnalysis[];
97
+ }
98
+ export interface ListRunsOptions {
99
+ agentId?: string;
100
+ status?: NondeterminismRunStatus;
101
+ limit?: number;
102
+ offset?: number;
103
+ }
104
+ export interface PassAtKInfo {
105
+ concepts: {
106
+ passAtK: {
107
+ name: string;
108
+ description: string;
109
+ formula: string;
110
+ useCase: string;
111
+ };
112
+ passToK: {
113
+ name: string;
114
+ description: string;
115
+ formula: string;
116
+ useCase: string;
117
+ };
118
+ variance: {
119
+ name: string;
120
+ description: string;
121
+ useCase: string;
122
+ };
123
+ reliability: {
124
+ name: string;
125
+ description: string;
126
+ useCase: string;
127
+ };
128
+ };
129
+ recommendations: Record<string, string>;
130
+ defaults: {
131
+ kValue: number;
132
+ reliabilityThreshold: number;
133
+ varianceThreshold: number;
134
+ };
135
+ }
136
+ /**
137
+ * Non-Determinism API client for pass@k analysis and reliability measurement
138
+ */
139
+ export declare const nondeterminism: {
140
+ /**
141
+ * Create a new non-determinism analysis run
142
+ *
143
+ * @example
144
+ * ```typescript
145
+ * const run = await nondeterminism.createRun({
146
+ * agentId: 'agent_123',
147
+ * criterionId: 'criterion_456',
148
+ * kValue: 5,
149
+ * traceIds: ['trace_1', 'trace_2', 'trace_3'],
150
+ * runType: 'pass_at_k',
151
+ * });
152
+ * ```
153
+ */
154
+ createRun(options: CreateRunOptions): Promise<NondeterminismRun>;
155
+ /**
156
+ * Get non-determinism runs
157
+ *
158
+ * @example
159
+ * ```typescript
160
+ * const runs = await nondeterminism.getRuns({ agentId: 'agent_123' });
161
+ * ```
162
+ */
163
+ getRuns(options?: ListRunsOptions): Promise<NondeterminismRun[]>;
164
+ /**
165
+ * Get a specific run
166
+ *
167
+ * @example
168
+ * ```typescript
169
+ * const run = await nondeterminism.getRun('run_123');
170
+ * ```
171
+ */
172
+ getRun(runId: string): Promise<NondeterminismRun>;
173
+ /**
174
+ * Start a run
175
+ *
176
+ * @example
177
+ * ```typescript
178
+ * await nondeterminism.startRun('run_123');
179
+ * ```
180
+ */
181
+ startRun(runId: string): Promise<void>;
182
+ /**
183
+ * Complete a run
184
+ *
185
+ * @example
186
+ * ```typescript
187
+ * await nondeterminism.completeRun('run_123');
188
+ * ```
189
+ */
190
+ completeRun(runId: string): Promise<void>;
191
+ /**
192
+ * Record a sample result
193
+ *
194
+ * @example
195
+ * ```typescript
196
+ * const sample = await nondeterminism.recordSample({
197
+ * runId: 'run_123',
198
+ * traceId: 'trace_456',
199
+ * criterionId: 'criterion_789',
200
+ * sampleIndex: 0,
201
+ * score: 85,
202
+ * passed: true,
203
+ * reasoning: 'Response meets quality criteria',
204
+ * });
205
+ * ```
206
+ */
207
+ recordSample(options: RecordSampleOptions): Promise<NondeterminismSample>;
208
+ /**
209
+ * Get samples for a run
210
+ *
211
+ * @example
212
+ * ```typescript
213
+ * const samples = await nondeterminism.getSamples('run_123');
214
+ * ```
215
+ */
216
+ getSamples(runId: string): Promise<NondeterminismSample[]>;
217
+ /**
218
+ * Get run summary with analysis
219
+ *
220
+ * @example
221
+ * ```typescript
222
+ * const summary = await nondeterminism.getRunSummary('run_123');
223
+ * console.log(`Pass@k rate: ${summary.criterionAnalyses[0].passAtKRate}`);
224
+ * ```
225
+ */
226
+ getRunSummary(runId: string): Promise<RunSummary>;
227
+ /**
228
+ * Trigger analysis of a completed run
229
+ *
230
+ * @example
231
+ * ```typescript
232
+ * const summary = await nondeterminism.analyzeRun('run_123');
233
+ * ```
234
+ */
235
+ analyzeRun(runId: string): Promise<RunSummary>;
236
+ /**
237
+ * Get information about pass@k analysis
238
+ *
239
+ * @example
240
+ * ```typescript
241
+ * const info = await nondeterminism.getInfo();
242
+ * console.log(info.concepts.passAtK.description);
243
+ * ```
244
+ */
245
+ getInfo(): Promise<PassAtKInfo>;
246
+ };
247
+ /**
248
+ * Calculate pass@k probability from pass rate
249
+ *
250
+ * @param passRate - Single-run pass rate (0-1)
251
+ * @param k - Number of runs
252
+ * @returns Probability that at least 1 of k runs passes
253
+ *
254
+ * @example
255
+ * ```typescript
256
+ * const passAtK = calculatePassAtK(0.7, 3); // ~0.973
257
+ * ```
258
+ */
259
+ export declare function calculatePassAtK(passRate: number, k: number): number;
260
+ /**
261
+ * Calculate pass^k probability from pass rate
262
+ *
263
+ * @param passRate - Single-run pass rate (0-1)
264
+ * @param k - Number of runs
265
+ * @returns Probability that all k runs pass
266
+ *
267
+ * @example
268
+ * ```typescript
269
+ * const passToK = calculatePassToK(0.7, 3); // ~0.343
270
+ * ```
271
+ */
272
+ export declare function calculatePassToK(passRate: number, k: number): number;
273
+ /**
274
+ * Calculate required pass rate to achieve target pass@k
275
+ *
276
+ * @param targetPassAtK - Desired pass@k probability
277
+ * @param k - Number of runs
278
+ * @returns Required single-run pass rate
279
+ *
280
+ * @example
281
+ * ```typescript
282
+ * const requiredRate = requiredPassRateForPassAtK(0.95, 3); // ~0.632
283
+ * ```
284
+ */
285
+ export declare function requiredPassRateForPassAtK(targetPassAtK: number, k: number): number;
286
+ /**
287
+ * Determine if evaluation is reliable based on analysis
288
+ *
289
+ * @param analysis - Criterion analysis result
290
+ * @param reliabilityThreshold - Minimum reliability score (default 0.8)
291
+ * @returns Whether the evaluation is considered reliable
292
+ */
293
+ export declare function isReliableEvaluation(analysis: CriterionAnalysis, reliabilityThreshold?: number): boolean;
294
+ /**
295
+ * Get recommendation based on reliability analysis
296
+ *
297
+ * @param analysis - Criterion analysis result
298
+ * @returns Actionable recommendation string
299
+ */
300
+ export declare function getReliabilityRecommendation(analysis: CriterionAnalysis): string;
@@ -0,0 +1,250 @@
1
+ "use strict";
2
+ /**
3
+ * ThinkHive SDK v3.0 - Non-Determinism API
4
+ *
5
+ * API for pass@k / pass^k analysis to measure LLM evaluation reliability
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.nondeterminism = void 0;
9
+ exports.calculatePassAtK = calculatePassAtK;
10
+ exports.calculatePassToK = calculatePassToK;
11
+ exports.requiredPassRateForPassAtK = requiredPassRateForPassAtK;
12
+ exports.isReliableEvaluation = isReliableEvaluation;
13
+ exports.getReliabilityRecommendation = getReliabilityRecommendation;
14
+ const client_1 = require("../core/client");
15
+ // ============================================================================
16
+ // NON-DETERMINISM API CLIENT
17
+ // ============================================================================
18
+ /**
19
+ * Non-Determinism API client for pass@k analysis and reliability measurement
20
+ */
21
+ exports.nondeterminism = {
22
+ /**
23
+ * Create a new non-determinism analysis run
24
+ *
25
+ * @example
26
+ * ```typescript
27
+ * const run = await nondeterminism.createRun({
28
+ * agentId: 'agent_123',
29
+ * criterionId: 'criterion_456',
30
+ * kValue: 5,
31
+ * traceIds: ['trace_1', 'trace_2', 'trace_3'],
32
+ * runType: 'pass_at_k',
33
+ * });
34
+ * ```
35
+ */
36
+ async createRun(options) {
37
+ return (0, client_1.apiRequestWithData)('/nondeterminism/runs', {
38
+ method: 'POST',
39
+ body: options,
40
+ apiVersion: 'v1',
41
+ });
42
+ },
43
+ /**
44
+ * Get non-determinism runs
45
+ *
46
+ * @example
47
+ * ```typescript
48
+ * const runs = await nondeterminism.getRuns({ agentId: 'agent_123' });
49
+ * ```
50
+ */
51
+ async getRuns(options = {}) {
52
+ const params = new URLSearchParams();
53
+ if (options.agentId)
54
+ params.set('agentId', options.agentId);
55
+ if (options.status)
56
+ params.set('status', options.status);
57
+ if (options.limit)
58
+ params.set('limit', String(options.limit));
59
+ if (options.offset)
60
+ params.set('offset', String(options.offset));
61
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs?${params.toString()}`, { apiVersion: 'v1' });
62
+ },
63
+ /**
64
+ * Get a specific run
65
+ *
66
+ * @example
67
+ * ```typescript
68
+ * const run = await nondeterminism.getRun('run_123');
69
+ * ```
70
+ */
71
+ async getRun(runId) {
72
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}`, { apiVersion: 'v1' });
73
+ },
74
+ /**
75
+ * Start a run
76
+ *
77
+ * @example
78
+ * ```typescript
79
+ * await nondeterminism.startRun('run_123');
80
+ * ```
81
+ */
82
+ async startRun(runId) {
83
+ await (0, client_1.apiRequest)(`/nondeterminism/runs/${runId}/start`, {
84
+ method: 'POST',
85
+ apiVersion: 'v1',
86
+ });
87
+ },
88
+ /**
89
+ * Complete a run
90
+ *
91
+ * @example
92
+ * ```typescript
93
+ * await nondeterminism.completeRun('run_123');
94
+ * ```
95
+ */
96
+ async completeRun(runId) {
97
+ await (0, client_1.apiRequest)(`/nondeterminism/runs/${runId}/complete`, {
98
+ method: 'POST',
99
+ apiVersion: 'v1',
100
+ });
101
+ },
102
+ /**
103
+ * Record a sample result
104
+ *
105
+ * @example
106
+ * ```typescript
107
+ * const sample = await nondeterminism.recordSample({
108
+ * runId: 'run_123',
109
+ * traceId: 'trace_456',
110
+ * criterionId: 'criterion_789',
111
+ * sampleIndex: 0,
112
+ * score: 85,
113
+ * passed: true,
114
+ * reasoning: 'Response meets quality criteria',
115
+ * });
116
+ * ```
117
+ */
118
+ async recordSample(options) {
119
+ return (0, client_1.apiRequestWithData)('/nondeterminism/samples', {
120
+ method: 'POST',
121
+ body: options,
122
+ apiVersion: 'v1',
123
+ });
124
+ },
125
+ /**
126
+ * Get samples for a run
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const samples = await nondeterminism.getSamples('run_123');
131
+ * ```
132
+ */
133
+ async getSamples(runId) {
134
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}/samples`, { apiVersion: 'v1' });
135
+ },
136
+ /**
137
+ * Get run summary with analysis
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * const summary = await nondeterminism.getRunSummary('run_123');
142
+ * console.log(`Pass@k rate: ${summary.criterionAnalyses[0].passAtKRate}`);
143
+ * ```
144
+ */
145
+ async getRunSummary(runId) {
146
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}/summary`, { apiVersion: 'v1' });
147
+ },
148
+ /**
149
+ * Trigger analysis of a completed run
150
+ *
151
+ * @example
152
+ * ```typescript
153
+ * const summary = await nondeterminism.analyzeRun('run_123');
154
+ * ```
155
+ */
156
+ async analyzeRun(runId) {
157
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}/analyze`, { method: 'POST', apiVersion: 'v1' });
158
+ },
159
+ /**
160
+ * Get information about pass@k analysis
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const info = await nondeterminism.getInfo();
165
+ * console.log(info.concepts.passAtK.description);
166
+ * ```
167
+ */
168
+ async getInfo() {
169
+ return (0, client_1.apiRequestWithData)('/nondeterminism/info', { apiVersion: 'v1' });
170
+ },
171
+ };
172
+ // ============================================================================
173
+ // HELPER FUNCTIONS
174
+ // ============================================================================
175
+ /**
176
+ * Calculate pass@k probability from pass rate
177
+ *
178
+ * @param passRate - Single-run pass rate (0-1)
179
+ * @param k - Number of runs
180
+ * @returns Probability that at least 1 of k runs passes
181
+ *
182
+ * @example
183
+ * ```typescript
184
+ * const passAtK = calculatePassAtK(0.7, 3); // ~0.973
185
+ * ```
186
+ */
187
+ function calculatePassAtK(passRate, k) {
188
+ return 1 - Math.pow(1 - passRate, k);
189
+ }
190
+ /**
191
+ * Calculate pass^k probability from pass rate
192
+ *
193
+ * @param passRate - Single-run pass rate (0-1)
194
+ * @param k - Number of runs
195
+ * @returns Probability that all k runs pass
196
+ *
197
+ * @example
198
+ * ```typescript
199
+ * const passToK = calculatePassToK(0.7, 3); // ~0.343
200
+ * ```
201
+ */
202
+ function calculatePassToK(passRate, k) {
203
+ return Math.pow(passRate, k);
204
+ }
205
+ /**
206
+ * Calculate required pass rate to achieve target pass@k
207
+ *
208
+ * @param targetPassAtK - Desired pass@k probability
209
+ * @param k - Number of runs
210
+ * @returns Required single-run pass rate
211
+ *
212
+ * @example
213
+ * ```typescript
214
+ * const requiredRate = requiredPassRateForPassAtK(0.95, 3); // ~0.632
215
+ * ```
216
+ */
217
+ function requiredPassRateForPassAtK(targetPassAtK, k) {
218
+ return 1 - Math.pow(1 - targetPassAtK, 1 / k);
219
+ }
220
+ /**
221
+ * Determine if evaluation is reliable based on analysis
222
+ *
223
+ * @param analysis - Criterion analysis result
224
+ * @param reliabilityThreshold - Minimum reliability score (default 0.8)
225
+ * @returns Whether the evaluation is considered reliable
226
+ */
227
+ function isReliableEvaluation(analysis, reliabilityThreshold = 0.8) {
228
+ return analysis.reliabilityScore >= reliabilityThreshold;
229
+ }
230
+ /**
231
+ * Get recommendation based on reliability analysis
232
+ *
233
+ * @param analysis - Criterion analysis result
234
+ * @returns Actionable recommendation string
235
+ */
236
+ function getReliabilityRecommendation(analysis) {
237
+ if (analysis.reliabilityScore >= 0.9) {
238
+ return 'Evaluation is highly reliable. No changes needed.';
239
+ }
240
+ else if (analysis.reliabilityScore >= 0.8) {
241
+ return 'Evaluation is reliable. Consider minor criteria refinements.';
242
+ }
243
+ else if (analysis.reliabilityScore >= 0.6) {
244
+ return 'Evaluation has moderate reliability. Add more specific criteria or examples.';
245
+ }
246
+ else {
247
+ return 'Evaluation is unreliable. Consider using deterministic checks or restructuring criteria.';
248
+ }
249
+ }
250
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"nondeterminism.js","sourceRoot":"","sources":["../../src/api/nondeterminism.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAqUH,4CAEC;AAcD,4CAEC;AAcD,gEAEC;AASD,oDAKC;AAQD,oEAUC;AArYD,2CAAgE;AA4HhE,+EAA+E;AAC/E,6BAA6B;AAC7B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,cAAc,GAAG;IAC5B;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,SAAS,CAAC,OAAyB;QACvC,OAAO,IAAA,2BAAkB,EAAoB,sBAAsB,EAAE;YACnE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,OAAO,CAAC,UAA2B,EAAE;QACzC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,IAAI,OAAO,CAAC,OAAO;YAAE,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5D,IAAI,OAAO,CAAC,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QACzD,IAAI,OAAO,CAAC,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9D,IAAI,OAAO,CAAC,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAEjE,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,MAAM,CAAC,QAAQ,EAAE,EAAE,EAC3C,EAAE,UAAU,EAAE,IAAI,EAAE,CACrB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa;QACxB,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,EAAE,EAC/B,EAAE,UAAU,EAAE,IAAI,EAAE,CACrB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAa;QAC1B,MAAM,IAAA,mBAAU,EAAC,wBAAwB,KAAK,QAAQ,EAAE;YACtD,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,WAAW,CAAC,KAAa;QAC7B,MAAM,IAAA,mBAAU,EAAC,wBAAwB,KAAK,WAAW,EAAE;YACzD,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,YAAY,CAAC,OAA4B;QAC7C,OAAO,IAAA,2BAAkB,EAAuB,yBAAyB,EAAE;YACzE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,IAAI;SACjB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,KAAa;QAC5B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,UAAU,EACvC,EAAE,UAAU,EAAE,IAAI,EAAE,CACrB,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,aAAa,CAAC,KAAa;QAC/B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,UAAU,EACvC,EAAE,UAAU,EAAE,IAAI,EAAE,CACrB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,KAAa;QAC5B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,UAAU,EACvC,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,CACrC,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,OAAO;QACX,OAAO,IAAA,2BAAkB,EACvB,sBAAsB,EACtB,EAAE,UAAU,EAAE,IAAI,EAAE,CACrB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,gBAAgB,CAAC,QAAgB,EAAE,CAAS;IAC1D,OAAO,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,EAAE,CAAC,CAAC,CAAC;AACvC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,gBAAgB,CAAC,QAAgB,EAAE,CAAS;IAC1D,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;AAC/B,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,0BAA0B,CAAC,aAAqB,EAAE,CAAS;IACzE,OAAO,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,aAAa,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;AAChD,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,oBAAoB,CAClC,QAA2B,EAC3B,oBAAoB,GAAG,GAAG;IAE1B,OAAO,QAAQ,CAAC,gBAAgB,IAAI,oBAAoB,CAAC;AAC3D,CAAC;AAED;;;;;GAKG;AACH,SAAgB,4BAA4B,CAAC,QAA2B;IACtE,IAAI,QAAQ,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;QACrC,OAAO,mDAAmD,CAAC;IAC7D,CAAC;SAAM,IAAI,QAAQ,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;QAC5C,OAAO,8DAA8D,CAAC;IACxE,CAAC;SAAM,IAAI,QAAQ,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;QAC5C,OAAO,8EAA8E,CAAC;IACxF,CAAC;SAAM,CAAC;QACN,OAAO,0FAA0F,CAAC;IACpG,CAAC;AACH,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Non-Determinism API\n *\n * API for pass@k / pass^k analysis to measure LLM evaluation reliability\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type NondeterminismRunType = 'pass_at_k' | 'pass_to_k' | 'variance' | 'reliability';\nexport type NondeterminismRunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';\n\nexport interface NondeterminismRun {\n  id: string;\n  companyId: string;\n  agentId: string;\n  runType: NondeterminismRunType;\n  kValue: number;\n  status: NondeterminismRunStatus;\n  traceCount: number;\n  criterionId?: string;\n  criteriaIds: string[];\n  temperature?: string;\n  model?: string;\n  progressPercent: number;\n  passAtKRate?: string;\n  passToKRate?: string;\n  avgVariance?: string;\n  reliabilityScore?: string;\n  startedAt?: string;\n  completedAt?: string;\n  createdBy?: string;\n  createdAt: string;\n}\n\nexport interface NondeterminismSample {\n  id: string;\n  runId: string;\n  traceId: string;\n  criterionId: string;\n  sampleIndex: number;\n  score: string;\n  passed: boolean;\n  reasoning?: string;\n  confidence?: string;\n  tokensUsed?: number;\n  costUsd?: string;\n  model?: string;\n  temperature?: string;\n  latencyMs?: number;\n  error?: string;\n  createdAt: string;\n}\n\nexport interface CreateRunOptions {\n  agentId: string;\n  criterionId?: string;\n  criteriaIds?: string[];\n  kValue: number;\n  traceIds: string[];\n  runType?: NondeterminismRunType;\n  temperature?: number;\n  model?: string;\n}\n\nexport interface RecordSampleOptions {\n  runId: string;\n  traceId: string;\n  criterionId: string;\n  sampleIndex: number;\n  score: number;\n  passed: boolean;\n  reasoning?: string;\n  confidence?: number;\n  tokensUsed?: number;\n  costUsd?: number;\n  model?: string;\n  temperature?: number;\n  latencyMs?: number;\n  error?: string;\n}\n\nexport interface TraceAnalysis {\n  traceId: string;\n  samples: NondeterminismSample[];\n  passCount: number;\n  totalCount: number;\n  passRate: number;\n  scoreVariance: number;\n  meanScore: number;\n  isConsistent: boolean;\n}\n\nexport interface CriterionAnalysis {\n  criterionId: string;\n  traceAnalyses: TraceAnalysis[];\n  passAtKRate: number;\n  passToKRate: number;\n  reliabilityScore: number;\n  isReliable: boolean;\n  recommendation: string;\n}\n\nexport interface RunSummary {\n  run: NondeterminismRun;\n  traceAnalyses: TraceAnalysis[];\n  criterionAnalyses: CriterionAnalysis[];\n}\n\nexport interface ListRunsOptions {\n  agentId?: string;\n  status?: NondeterminismRunStatus;\n  limit?: number;\n  offset?: number;\n}\n\nexport interface PassAtKInfo {\n  concepts: {\n    passAtK: { name: string; description: string; formula: string; useCase: string };\n    passToK: { name: string; description: string; formula: string; useCase: string };\n    variance: { name: string; description: string; useCase: string };\n    reliability: { name: string; description: string; useCase: string };\n  };\n  recommendations: Record<string, string>;\n  defaults: { kValue: number; reliabilityThreshold: number; varianceThreshold: number };\n}\n\n// ============================================================================\n// NON-DETERMINISM API CLIENT\n// ============================================================================\n\n/**\n * Non-Determinism API client for pass@k analysis and reliability measurement\n */\nexport const nondeterminism = {\n  /**\n   * Create a new non-determinism analysis run\n   *\n   * @example\n   * ```typescript\n   * const run = await nondeterminism.createRun({\n   *   agentId: 'agent_123',\n   *   criterionId: 'criterion_456',\n   *   kValue: 5,\n   *   traceIds: ['trace_1', 'trace_2', 'trace_3'],\n   *   runType: 'pass_at_k',\n   * });\n   * ```\n   */\n  async createRun(options: CreateRunOptions): Promise<NondeterminismRun> {\n    return apiRequestWithData<NondeterminismRun>('/nondeterminism/runs', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Get non-determinism runs\n   *\n   * @example\n   * ```typescript\n   * const runs = await nondeterminism.getRuns({ agentId: 'agent_123' });\n   * ```\n   */\n  async getRuns(options: ListRunsOptions = {}): Promise<NondeterminismRun[]> {\n    const params = new URLSearchParams();\n    if (options.agentId) params.set('agentId', options.agentId);\n    if (options.status) params.set('status', options.status);\n    if (options.limit) params.set('limit', String(options.limit));\n    if (options.offset) params.set('offset', String(options.offset));\n\n    return apiRequestWithData<NondeterminismRun[]>(\n      `/nondeterminism/runs?${params.toString()}`,\n      { apiVersion: 'v1' }\n    );\n  },\n\n  /**\n   * Get a specific run\n   *\n   * @example\n   * ```typescript\n   * const run = await nondeterminism.getRun('run_123');\n   * ```\n   */\n  async getRun(runId: string): Promise<NondeterminismRun> {\n    return apiRequestWithData<NondeterminismRun>(\n      `/nondeterminism/runs/${runId}`,\n      { apiVersion: 'v1' }\n    );\n  },\n\n  /**\n   * Start a run\n   *\n   * @example\n   * ```typescript\n   * await nondeterminism.startRun('run_123');\n   * ```\n   */\n  async startRun(runId: string): Promise<void> {\n    await apiRequest(`/nondeterminism/runs/${runId}/start`, {\n      method: 'POST',\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Complete a run\n   *\n   * @example\n   * ```typescript\n   * await nondeterminism.completeRun('run_123');\n   * ```\n   */\n  async completeRun(runId: string): Promise<void> {\n    await apiRequest(`/nondeterminism/runs/${runId}/complete`, {\n      method: 'POST',\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Record a sample result\n   *\n   * @example\n   * ```typescript\n   * const sample = await nondeterminism.recordSample({\n   *   runId: 'run_123',\n   *   traceId: 'trace_456',\n   *   criterionId: 'criterion_789',\n   *   sampleIndex: 0,\n   *   score: 85,\n   *   passed: true,\n   *   reasoning: 'Response meets quality criteria',\n   * });\n   * ```\n   */\n  async recordSample(options: RecordSampleOptions): Promise<NondeterminismSample> {\n    return apiRequestWithData<NondeterminismSample>('/nondeterminism/samples', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'v1',\n    });\n  },\n\n  /**\n   * Get samples for a run\n   *\n   * @example\n   * ```typescript\n   * const samples = await nondeterminism.getSamples('run_123');\n   * ```\n   */\n  async getSamples(runId: string): Promise<NondeterminismSample[]> {\n    return apiRequestWithData<NondeterminismSample[]>(\n      `/nondeterminism/runs/${runId}/samples`,\n      { apiVersion: 'v1' }\n    );\n  },\n\n  /**\n   * Get run summary with analysis\n   *\n   * @example\n   * ```typescript\n   * const summary = await nondeterminism.getRunSummary('run_123');\n   * console.log(`Pass@k rate: ${summary.criterionAnalyses[0].passAtKRate}`);\n   * ```\n   */\n  async getRunSummary(runId: string): Promise<RunSummary> {\n    return apiRequestWithData<RunSummary>(\n      `/nondeterminism/runs/${runId}/summary`,\n      { apiVersion: 'v1' }\n    );\n  },\n\n  /**\n   * Trigger analysis of a completed run\n   *\n   * @example\n   * ```typescript\n   * const summary = await nondeterminism.analyzeRun('run_123');\n   * ```\n   */\n  async analyzeRun(runId: string): Promise<RunSummary> {\n    return apiRequestWithData<RunSummary>(\n      `/nondeterminism/runs/${runId}/analyze`,\n      { method: 'POST', apiVersion: 'v1' }\n    );\n  },\n\n  /**\n   * Get information about pass@k analysis\n   *\n   * @example\n   * ```typescript\n   * const info = await nondeterminism.getInfo();\n   * console.log(info.concepts.passAtK.description);\n   * ```\n   */\n  async getInfo(): Promise<PassAtKInfo> {\n    return apiRequestWithData<PassAtKInfo>(\n      '/nondeterminism/info',\n      { apiVersion: 'v1' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate pass@k probability from pass rate\n *\n * @param passRate - Single-run pass rate (0-1)\n * @param k - Number of runs\n * @returns Probability that at least 1 of k runs passes\n *\n * @example\n * ```typescript\n * const passAtK = calculatePassAtK(0.7, 3); // ~0.973\n * ```\n */\nexport function calculatePassAtK(passRate: number, k: number): number {\n  return 1 - Math.pow(1 - passRate, k);\n}\n\n/**\n * Calculate pass^k probability from pass rate\n *\n * @param passRate - Single-run pass rate (0-1)\n * @param k - Number of runs\n * @returns Probability that all k runs pass\n *\n * @example\n * ```typescript\n * const passToK = calculatePassToK(0.7, 3); // ~0.343\n * ```\n */\nexport function calculatePassToK(passRate: number, k: number): number {\n  return Math.pow(passRate, k);\n}\n\n/**\n * Calculate required pass rate to achieve target pass@k\n *\n * @param targetPassAtK - Desired pass@k probability\n * @param k - Number of runs\n * @returns Required single-run pass rate\n *\n * @example\n * ```typescript\n * const requiredRate = requiredPassRateForPassAtK(0.95, 3); // ~0.632\n * ```\n */\nexport function requiredPassRateForPassAtK(targetPassAtK: number, k: number): number {\n  return 1 - Math.pow(1 - targetPassAtK, 1 / k);\n}\n\n/**\n * Determine if evaluation is reliable based on analysis\n *\n * @param analysis - Criterion analysis result\n * @param reliabilityThreshold - Minimum reliability score (default 0.8)\n * @returns Whether the evaluation is considered reliable\n */\nexport function isReliableEvaluation(\n  analysis: CriterionAnalysis,\n  reliabilityThreshold = 0.8\n): boolean {\n  return analysis.reliabilityScore >= reliabilityThreshold;\n}\n\n/**\n * Get recommendation based on reliability analysis\n *\n * @param analysis - Criterion analysis result\n * @returns Actionable recommendation string\n */\nexport function getReliabilityRecommendation(analysis: CriterionAnalysis): string {\n  if (analysis.reliabilityScore >= 0.9) {\n    return 'Evaluation is highly reliable. No changes needed.';\n  } else if (analysis.reliabilityScore >= 0.8) {\n    return 'Evaluation is reliable. Consider minor criteria refinements.';\n  } else if (analysis.reliabilityScore >= 0.6) {\n    return 'Evaluation has moderate reliability. Add more specific criteria or examples.';\n  } else {\n    return 'Evaluation is unreliable. Consider using deterministic checks or restructuring criteria.';\n  }\n}\n"]}