@thinkhive/sdk 3.1.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,250 @@
1
+ "use strict";
2
+ /**
3
+ * ThinkHive SDK v3.0 - Non-Determinism API
4
+ *
5
+ * API for pass@k / pass^k analysis to measure LLM evaluation reliability
6
+ */
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.nondeterminism = void 0;
9
+ exports.calculatePassAtK = calculatePassAtK;
10
+ exports.calculatePassToK = calculatePassToK;
11
+ exports.requiredPassRateForPassAtK = requiredPassRateForPassAtK;
12
+ exports.isReliableEvaluation = isReliableEvaluation;
13
+ exports.getReliabilityRecommendation = getReliabilityRecommendation;
14
+ const client_1 = require("../core/client");
15
+ // ============================================================================
16
+ // NON-DETERMINISM API CLIENT
17
+ // ============================================================================
18
+ /**
19
+ * Non-Determinism API client for pass@k analysis and reliability measurement
20
+ */
21
+ exports.nondeterminism = {
22
+ /**
23
+ * Create a new non-determinism analysis run
24
+ *
25
+ * @example
26
+ * ```typescript
27
+ * const run = await nondeterminism.createRun({
28
+ * agentId: 'agent_123',
29
+ * criterionId: 'criterion_456',
30
+ * kValue: 5,
31
+ * traceIds: ['trace_1', 'trace_2', 'trace_3'],
32
+ * runType: 'pass_at_k',
33
+ * });
34
+ * ```
35
+ */
36
+ async createRun(options) {
37
+ return (0, client_1.apiRequestWithData)('/nondeterminism/runs', {
38
+ method: 'POST',
39
+ body: options,
40
+ apiVersion: 'none',
41
+ });
42
+ },
43
+ /**
44
+ * Get non-determinism runs
45
+ *
46
+ * @example
47
+ * ```typescript
48
+ * const runs = await nondeterminism.getRuns({ agentId: 'agent_123' });
49
+ * ```
50
+ */
51
+ async getRuns(options = {}) {
52
+ const params = new URLSearchParams();
53
+ if (options.agentId)
54
+ params.set('agentId', options.agentId);
55
+ if (options.status)
56
+ params.set('status', options.status);
57
+ if (options.limit)
58
+ params.set('limit', String(options.limit));
59
+ if (options.offset)
60
+ params.set('offset', String(options.offset));
61
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs?${params.toString()}`, { apiVersion: 'none' });
62
+ },
63
+ /**
64
+ * Get a specific run
65
+ *
66
+ * @example
67
+ * ```typescript
68
+ * const run = await nondeterminism.getRun('run_123');
69
+ * ```
70
+ */
71
+ async getRun(runId) {
72
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}`, { apiVersion: 'none' });
73
+ },
74
+ /**
75
+ * Start a run
76
+ *
77
+ * @example
78
+ * ```typescript
79
+ * await nondeterminism.startRun('run_123');
80
+ * ```
81
+ */
82
+ async startRun(runId) {
83
+ await (0, client_1.apiRequest)(`/nondeterminism/runs/${runId}/start`, {
84
+ method: 'POST',
85
+ apiVersion: 'none',
86
+ });
87
+ },
88
+ /**
89
+ * Complete a run
90
+ *
91
+ * @example
92
+ * ```typescript
93
+ * await nondeterminism.completeRun('run_123');
94
+ * ```
95
+ */
96
+ async completeRun(runId) {
97
+ await (0, client_1.apiRequest)(`/nondeterminism/runs/${runId}/complete`, {
98
+ method: 'POST',
99
+ apiVersion: 'none',
100
+ });
101
+ },
102
+ /**
103
+ * Record a sample result
104
+ *
105
+ * @example
106
+ * ```typescript
107
+ * const sample = await nondeterminism.recordSample({
108
+ * runId: 'run_123',
109
+ * traceId: 'trace_456',
110
+ * criterionId: 'criterion_789',
111
+ * sampleIndex: 0,
112
+ * score: 85,
113
+ * passed: true,
114
+ * reasoning: 'Response meets quality criteria',
115
+ * });
116
+ * ```
117
+ */
118
+ async recordSample(options) {
119
+ return (0, client_1.apiRequestWithData)('/nondeterminism/samples', {
120
+ method: 'POST',
121
+ body: options,
122
+ apiVersion: 'none',
123
+ });
124
+ },
125
+ /**
126
+ * Get samples for a run
127
+ *
128
+ * @example
129
+ * ```typescript
130
+ * const samples = await nondeterminism.getSamples('run_123');
131
+ * ```
132
+ */
133
+ async getSamples(runId) {
134
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}/samples`, { apiVersion: 'none' });
135
+ },
136
+ /**
137
+ * Get run summary with analysis
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * const summary = await nondeterminism.getRunSummary('run_123');
142
+ * console.log(`Pass@k rate: ${summary.criterionAnalyses[0].passAtKRate}`);
143
+ * ```
144
+ */
145
+ async getRunSummary(runId) {
146
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}/summary`, { apiVersion: 'none' });
147
+ },
148
+ /**
149
+ * Trigger analysis of a completed run
150
+ *
151
+ * @example
152
+ * ```typescript
153
+ * const summary = await nondeterminism.analyzeRun('run_123');
154
+ * ```
155
+ */
156
+ async analyzeRun(runId) {
157
+ return (0, client_1.apiRequestWithData)(`/nondeterminism/runs/${runId}/analyze`, { method: 'POST', apiVersion: 'none' });
158
+ },
159
+ /**
160
+ * Get information about pass@k analysis
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const info = await nondeterminism.getInfo();
165
+ * console.log(info.concepts.passAtK.description);
166
+ * ```
167
+ */
168
+ async getInfo() {
169
+ return (0, client_1.apiRequestWithData)('/nondeterminism/info', { apiVersion: 'none' });
170
+ },
171
+ };
172
+ // ============================================================================
173
+ // HELPER FUNCTIONS
174
+ // ============================================================================
175
+ /**
176
+ * Calculate pass@k probability from pass rate
177
+ *
178
+ * @param passRate - Single-run pass rate (0-1)
179
+ * @param k - Number of runs
180
+ * @returns Probability that at least 1 of k runs passes
181
+ *
182
+ * @example
183
+ * ```typescript
184
+ * const passAtK = calculatePassAtK(0.7, 3); // ~0.973
185
+ * ```
186
+ */
187
+ function calculatePassAtK(passRate, k) {
188
+ return 1 - Math.pow(1 - passRate, k);
189
+ }
190
+ /**
191
+ * Calculate pass^k probability from pass rate
192
+ *
193
+ * @param passRate - Single-run pass rate (0-1)
194
+ * @param k - Number of runs
195
+ * @returns Probability that all k runs pass
196
+ *
197
+ * @example
198
+ * ```typescript
199
+ * const passToK = calculatePassToK(0.7, 3); // ~0.343
200
+ * ```
201
+ */
202
+ function calculatePassToK(passRate, k) {
203
+ return Math.pow(passRate, k);
204
+ }
205
+ /**
206
+ * Calculate required pass rate to achieve target pass@k
207
+ *
208
+ * @param targetPassAtK - Desired pass@k probability
209
+ * @param k - Number of runs
210
+ * @returns Required single-run pass rate
211
+ *
212
+ * @example
213
+ * ```typescript
214
+ * const requiredRate = requiredPassRateForPassAtK(0.95, 3); // ~0.632
215
+ * ```
216
+ */
217
+ function requiredPassRateForPassAtK(targetPassAtK, k) {
218
+ return 1 - Math.pow(1 - targetPassAtK, 1 / k);
219
+ }
220
+ /**
221
+ * Determine if evaluation is reliable based on analysis
222
+ *
223
+ * @param analysis - Criterion analysis result
224
+ * @param reliabilityThreshold - Minimum reliability score (default 0.8)
225
+ * @returns Whether the evaluation is considered reliable
226
+ */
227
+ function isReliableEvaluation(analysis, reliabilityThreshold = 0.8) {
228
+ return analysis.reliabilityScore >= reliabilityThreshold;
229
+ }
230
+ /**
231
+ * Get recommendation based on reliability analysis
232
+ *
233
+ * @param analysis - Criterion analysis result
234
+ * @returns Actionable recommendation string
235
+ */
236
+ function getReliabilityRecommendation(analysis) {
237
+ if (analysis.reliabilityScore >= 0.9) {
238
+ return 'Evaluation is highly reliable. No changes needed.';
239
+ }
240
+ else if (analysis.reliabilityScore >= 0.8) {
241
+ return 'Evaluation is reliable. Consider minor criteria refinements.';
242
+ }
243
+ else if (analysis.reliabilityScore >= 0.6) {
244
+ return 'Evaluation has moderate reliability. Add more specific criteria or examples.';
245
+ }
246
+ else {
247
+ return 'Evaluation is unreliable. Consider using deterministic checks or restructuring criteria.';
248
+ }
249
+ }
250
+ //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"nondeterminism.js","sourceRoot":"","sources":["../../src/api/nondeterminism.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAqUH,4CAEC;AAcD,4CAEC;AAcD,gEAEC;AASD,oDAKC;AAQD,oEAUC;AArYD,2CAAgE;AA4HhE,+EAA+E;AAC/E,6BAA6B;AAC7B,+EAA+E;AAE/E;;GAEG;AACU,QAAA,cAAc,GAAG;IAC5B;;;;;;;;;;;;;OAaG;IACH,KAAK,CAAC,SAAS,CAAC,OAAyB;QACvC,OAAO,IAAA,2BAAkB,EAAoB,sBAAsB,EAAE;YACnE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,OAAO,CAAC,UAA2B,EAAE;QACzC,MAAM,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;QACrC,IAAI,OAAO,CAAC,OAAO;YAAE,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5D,IAAI,OAAO,CAAC,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QACzD,IAAI,OAAO,CAAC,KAAK;YAAE,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAC9D,IAAI,OAAO,CAAC,MAAM;YAAE,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAEjE,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,MAAM,CAAC,QAAQ,EAAE,EAAE,EAC3C,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa;QACxB,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,EAAE,EAC/B,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,QAAQ,CAAC,KAAa;QAC1B,MAAM,IAAA,mBAAU,EAAC,wBAAwB,KAAK,QAAQ,EAAE;YACtD,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,WAAW,CAAC,KAAa;QAC7B,MAAM,IAAA,mBAAU,EAAC,wBAAwB,KAAK,WAAW,EAAE;YACzD,MAAM,EAAE,MAAM;YACd,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,YAAY,CAAC,OAA4B;QAC7C,OAAO,IAAA,2BAAkB,EAAuB,yBAAyB,EAAE;YACzE,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,KAAa;QAC5B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,UAAU,EACvC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,aAAa,CAAC,KAAa;QAC/B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,UAAU,EACvC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CAAC,KAAa;QAC5B,OAAO,IAAA,2BAAkB,EACvB,wBAAwB,KAAK,UAAU,EACvC,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,CACvC,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,OAAO;QACX,OAAO,IAAA,2BAAkB,EACvB,sBAAsB,EACtB,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,gBAAgB,CAAC,QAAgB,EAAE,CAAS;IAC1D,OAAO,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,EAAE,CAAC,CAAC,CAAC;AACvC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,gBAAgB,CAAC,QAAgB,EAAE,CAAS;IAC1D,OAAO,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;AAC/B,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,0BAA0B,CAAC,aAAqB,EAAE,CAAS;IACzE,OAAO,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,aAAa,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;AAChD,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,oBAAoB,CAClC,QAA2B,EAC3B,oBAAoB,GAAG,GAAG;IAE1B,OAAO,QAAQ,CAAC,gBAAgB,IAAI,oBAAoB,CAAC;AAC3D,CAAC;AAED;;;;;GAKG;AACH,SAAgB,4BAA4B,CAAC,QAA2B;IACtE,IAAI,QAAQ,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;QACrC,OAAO,mDAAmD,CAAC;IAC7D,CAAC;SAAM,IAAI,QAAQ,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;QAC5C,OAAO,8DAA8D,CAAC;IACxE,CAAC;SAAM,IAAI,QAAQ,CAAC,gBAAgB,IAAI,GAAG,EAAE,CAAC;QAC5C,OAAO,8EAA8E,CAAC;IACxF,CAAC;SAAM,CAAC;QACN,OAAO,0FAA0F,CAAC;IACpG,CAAC;AACH,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Non-Determinism API\n *\n * API for pass@k / pass^k analysis to measure LLM evaluation reliability\n */\n\nimport { apiRequest, apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type NondeterminismRunType = 'pass_at_k' | 'pass_to_k' | 'variance' | 'reliability';\nexport type NondeterminismRunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';\n\nexport interface NondeterminismRun {\n  id: string;\n  companyId: string;\n  agentId: string;\n  runType: NondeterminismRunType;\n  kValue: number;\n  status: NondeterminismRunStatus;\n  traceCount: number;\n  criterionId?: string;\n  criteriaIds: string[];\n  temperature?: string;\n  model?: string;\n  progressPercent: number;\n  passAtKRate?: string;\n  passToKRate?: string;\n  avgVariance?: string;\n  reliabilityScore?: string;\n  startedAt?: string;\n  completedAt?: string;\n  createdBy?: string;\n  createdAt: string;\n}\n\nexport interface NondeterminismSample {\n  id: string;\n  runId: string;\n  traceId: string;\n  criterionId: string;\n  sampleIndex: number;\n  score: string;\n  passed: boolean;\n  reasoning?: string;\n  confidence?: string;\n  tokensUsed?: number;\n  costUsd?: string;\n  model?: string;\n  temperature?: string;\n  latencyMs?: number;\n  error?: string;\n  createdAt: string;\n}\n\nexport interface CreateRunOptions {\n  agentId: string;\n  criterionId?: string;\n  criteriaIds?: string[];\n  kValue: number;\n  traceIds: string[];\n  runType?: NondeterminismRunType;\n  temperature?: number;\n  model?: string;\n}\n\nexport interface RecordSampleOptions {\n  runId: string;\n  traceId: string;\n  criterionId: string;\n  sampleIndex: number;\n  score: number;\n  passed: boolean;\n  reasoning?: string;\n  confidence?: number;\n  tokensUsed?: number;\n  costUsd?: number;\n  model?: string;\n  temperature?: number;\n  latencyMs?: number;\n  error?: string;\n}\n\nexport interface TraceAnalysis {\n  traceId: string;\n  samples: NondeterminismSample[];\n  passCount: number;\n  totalCount: number;\n  passRate: number;\n  scoreVariance: number;\n  meanScore: number;\n  isConsistent: boolean;\n}\n\nexport interface CriterionAnalysis {\n  criterionId: string;\n  traceAnalyses: TraceAnalysis[];\n  passAtKRate: number;\n  passToKRate: number;\n  reliabilityScore: number;\n  isReliable: boolean;\n  recommendation: string;\n}\n\nexport interface RunSummary {\n  run: NondeterminismRun;\n  traceAnalyses: TraceAnalysis[];\n  criterionAnalyses: CriterionAnalysis[];\n}\n\nexport interface ListRunsOptions {\n  agentId?: string;\n  status?: NondeterminismRunStatus;\n  limit?: number;\n  offset?: number;\n}\n\nexport interface PassAtKInfo {\n  concepts: {\n    passAtK: { name: string; description: string; formula: string; useCase: string };\n    passToK: { name: string; description: string; formula: string; useCase: string };\n    variance: { name: string; description: string; useCase: string };\n    reliability: { name: string; description: string; useCase: string };\n  };\n  recommendations: Record<string, string>;\n  defaults: { kValue: number; reliabilityThreshold: number; varianceThreshold: number };\n}\n\n// ============================================================================\n// NON-DETERMINISM API CLIENT\n// ============================================================================\n\n/**\n * Non-Determinism API client for pass@k analysis and reliability measurement\n */\nexport const nondeterminism = {\n  /**\n   * Create a new non-determinism analysis run\n   *\n   * @example\n   * ```typescript\n   * const run = await nondeterminism.createRun({\n   *   agentId: 'agent_123',\n   *   criterionId: 'criterion_456',\n   *   kValue: 5,\n   *   traceIds: ['trace_1', 'trace_2', 'trace_3'],\n   *   runType: 'pass_at_k',\n   * });\n   * ```\n   */\n  async createRun(options: CreateRunOptions): Promise<NondeterminismRun> {\n    return apiRequestWithData<NondeterminismRun>('/nondeterminism/runs', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get non-determinism runs\n   *\n   * @example\n   * ```typescript\n   * const runs = await nondeterminism.getRuns({ agentId: 'agent_123' });\n   * ```\n   */\n  async getRuns(options: ListRunsOptions = {}): Promise<NondeterminismRun[]> {\n    const params = new URLSearchParams();\n    if (options.agentId) params.set('agentId', options.agentId);\n    if (options.status) params.set('status', options.status);\n    if (options.limit) params.set('limit', String(options.limit));\n    if (options.offset) params.set('offset', String(options.offset));\n\n    return apiRequestWithData<NondeterminismRun[]>(\n      `/nondeterminism/runs?${params.toString()}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get a specific run\n   *\n   * @example\n   * ```typescript\n   * const run = await nondeterminism.getRun('run_123');\n   * ```\n   */\n  async getRun(runId: string): Promise<NondeterminismRun> {\n    return apiRequestWithData<NondeterminismRun>(\n      `/nondeterminism/runs/${runId}`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Start a run\n   *\n   * @example\n   * ```typescript\n   * await nondeterminism.startRun('run_123');\n   * ```\n   */\n  async startRun(runId: string): Promise<void> {\n    await apiRequest(`/nondeterminism/runs/${runId}/start`, {\n      method: 'POST',\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Complete a run\n   *\n   * @example\n   * ```typescript\n   * await nondeterminism.completeRun('run_123');\n   * ```\n   */\n  async completeRun(runId: string): Promise<void> {\n    await apiRequest(`/nondeterminism/runs/${runId}/complete`, {\n      method: 'POST',\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Record a sample result\n   *\n   * @example\n   * ```typescript\n   * const sample = await nondeterminism.recordSample({\n   *   runId: 'run_123',\n   *   traceId: 'trace_456',\n   *   criterionId: 'criterion_789',\n   *   sampleIndex: 0,\n   *   score: 85,\n   *   passed: true,\n   *   reasoning: 'Response meets quality criteria',\n   * });\n   * ```\n   */\n  async recordSample(options: RecordSampleOptions): Promise<NondeterminismSample> {\n    return apiRequestWithData<NondeterminismSample>('/nondeterminism/samples', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get samples for a run\n   *\n   * @example\n   * ```typescript\n   * const samples = await nondeterminism.getSamples('run_123');\n   * ```\n   */\n  async getSamples(runId: string): Promise<NondeterminismSample[]> {\n    return apiRequestWithData<NondeterminismSample[]>(\n      `/nondeterminism/runs/${runId}/samples`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get run summary with analysis\n   *\n   * @example\n   * ```typescript\n   * const summary = await nondeterminism.getRunSummary('run_123');\n   * console.log(`Pass@k rate: ${summary.criterionAnalyses[0].passAtKRate}`);\n   * ```\n   */\n  async getRunSummary(runId: string): Promise<RunSummary> {\n    return apiRequestWithData<RunSummary>(\n      `/nondeterminism/runs/${runId}/summary`,\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Trigger analysis of a completed run\n   *\n   * @example\n   * ```typescript\n   * const summary = await nondeterminism.analyzeRun('run_123');\n   * ```\n   */\n  async analyzeRun(runId: string): Promise<RunSummary> {\n    return apiRequestWithData<RunSummary>(\n      `/nondeterminism/runs/${runId}/analyze`,\n      { method: 'POST', apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get information about pass@k analysis\n   *\n   * @example\n   * ```typescript\n   * const info = await nondeterminism.getInfo();\n   * console.log(info.concepts.passAtK.description);\n   * ```\n   */\n  async getInfo(): Promise<PassAtKInfo> {\n    return apiRequestWithData<PassAtKInfo>(\n      '/nondeterminism/info',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Calculate pass@k probability from pass rate\n *\n * @param passRate - Single-run pass rate (0-1)\n * @param k - Number of runs\n * @returns Probability that at least 1 of k runs passes\n *\n * @example\n * ```typescript\n * const passAtK = calculatePassAtK(0.7, 3); // ~0.973\n * ```\n */\nexport function calculatePassAtK(passRate: number, k: number): number {\n  return 1 - Math.pow(1 - passRate, k);\n}\n\n/**\n * Calculate pass^k probability from pass rate\n *\n * @param passRate - Single-run pass rate (0-1)\n * @param k - Number of runs\n * @returns Probability that all k runs pass\n *\n * @example\n * ```typescript\n * const passToK = calculatePassToK(0.7, 3); // ~0.343\n * ```\n */\nexport function calculatePassToK(passRate: number, k: number): number {\n  return Math.pow(passRate, k);\n}\n\n/**\n * Calculate required pass rate to achieve target pass@k\n *\n * @param targetPassAtK - Desired pass@k probability\n * @param k - Number of runs\n * @returns Required single-run pass rate\n *\n * @example\n * ```typescript\n * const requiredRate = requiredPassRateForPassAtK(0.95, 3); // ~0.632\n * ```\n */\nexport function requiredPassRateForPassAtK(targetPassAtK: number, k: number): number {\n  return 1 - Math.pow(1 - targetPassAtK, 1 / k);\n}\n\n/**\n * Determine if evaluation is reliable based on analysis\n *\n * @param analysis - Criterion analysis result\n * @param reliabilityThreshold - Minimum reliability score (default 0.8)\n * @returns Whether the evaluation is considered reliable\n */\nexport function isReliableEvaluation(\n  analysis: CriterionAnalysis,\n  reliabilityThreshold = 0.8\n): boolean {\n  return analysis.reliabilityScore >= reliabilityThreshold;\n}\n\n/**\n * Get recommendation based on reliability analysis\n *\n * @param analysis - Criterion analysis result\n * @returns Actionable recommendation string\n */\nexport function getReliabilityRecommendation(analysis: CriterionAnalysis): string {\n  if (analysis.reliabilityScore >= 0.9) {\n    return 'Evaluation is highly reliable. No changes needed.';\n  } else if (analysis.reliabilityScore >= 0.8) {\n    return 'Evaluation is reliable. Consider minor criteria refinements.';\n  } else if (analysis.reliabilityScore >= 0.6) {\n    return 'Evaluation has moderate reliability. Add more specific criteria or examples.';\n  } else {\n    return 'Evaluation is unreliable. Consider using deterministic checks or restructuring criteria.';\n  }\n}\n"]}
@@ -0,0 +1,303 @@
1
+ /**
2
+ * ThinkHive SDK v3.1 - Quality Metrics API
3
+ *
4
+ * RAG Evaluation & Hallucination Detection for AI quality assurance
5
+ */
6
+ /**
7
+ * Retrieved context for RAG evaluation
8
+ */
9
+ export interface RetrievedContext {
10
+ content: string;
11
+ chunkIndex?: number;
12
+ metadata?: Record<string, unknown>;
13
+ score?: number;
14
+ }
15
+ /**
16
+ * Ground truth context
17
+ */
18
+ export interface GroundTruthContext {
19
+ content: string;
20
+ chunkIndex?: number;
21
+ }
22
+ /**
23
+ * Grounded span evidence
24
+ */
25
+ export interface GroundedSpan {
26
+ text: string;
27
+ confidence: number;
28
+ sourceChunkIndex?: number;
29
+ }
30
+ /**
31
+ * Ungrounded span evidence
32
+ */
33
+ export interface UngroundedSpan {
34
+ text: string;
35
+ confidence: number;
36
+ }
37
+ /**
38
+ * Citation mapping
39
+ */
40
+ export interface CitationMap {
41
+ claim: string;
42
+ citedIndex: number;
43
+ isValid: boolean;
44
+ }
45
+ /**
46
+ * RAG evaluation result
47
+ */
48
+ export interface RAGEvaluation {
49
+ contextRelevance: number;
50
+ contextPrecision: number;
51
+ contextRecall: number;
52
+ groundedness: number;
53
+ faithfulness: number;
54
+ answerRelevance: number;
55
+ citationAccuracy: number;
56
+ citationCompleteness: number;
57
+ overallScore: number;
58
+ grade: 'A' | 'B' | 'C' | 'D' | 'F';
59
+ groundedSpanCount?: number;
60
+ ungroundedSpanCount?: number;
61
+ issues: string[];
62
+ recommendations: string[];
63
+ }
64
+ /**
65
+ * RAG evaluation evidence
66
+ */
67
+ export interface RAGEvidence {
68
+ groundedSpans: GroundedSpan[];
69
+ ungroundedSpans: UngroundedSpan[];
70
+ citationMap: CitationMap[];
71
+ }
72
+ /**
73
+ * Hallucination instance
74
+ */
75
+ export interface HallucinationInstance {
76
+ type: string;
77
+ severity: 'low' | 'medium' | 'high' | 'critical';
78
+ text: string;
79
+ explanation: string;
80
+ confidence: number;
81
+ suggestedFix?: string;
82
+ }
83
+ /**
84
+ * Hallucination detection report
85
+ */
86
+ export interface HallucinationReport {
87
+ hasHallucinations: boolean;
88
+ hallucinationScore: number;
89
+ riskLevel: 'low' | 'medium' | 'high' | 'critical';
90
+ factualClaims: number;
91
+ verifiedClaims: number;
92
+ unverifiedClaims: number;
93
+ summary: string;
94
+ recommendations: string[];
95
+ instances: HallucinationInstance[];
96
+ }
97
+ /**
98
+ * Groundedness analysis result
99
+ */
100
+ export interface GroundednessResult {
101
+ score: number;
102
+ faithfulness: number;
103
+ contextRelevance: number;
104
+ grade: string;
105
+ }
106
+ /**
107
+ * Batch evaluation result for a single trace
108
+ */
109
+ export interface BatchEvaluationResult {
110
+ traceId: string;
111
+ success: boolean;
112
+ error?: string;
113
+ rag?: {
114
+ score: number;
115
+ grade: string;
116
+ mainIssue?: string;
117
+ };
118
+ hallucination?: {
119
+ hasIssues: boolean;
120
+ score: number;
121
+ topIssue?: string;
122
+ };
123
+ }
124
+ /**
125
+ * Batch evaluation summary
126
+ */
127
+ export interface BatchEvaluationSummary {
128
+ totalTraces: number;
129
+ successfulEvaluations: number;
130
+ avgRagScore: number;
131
+ hallucinationRate: number;
132
+ gradeDistribution: {
133
+ A: number;
134
+ B: number;
135
+ C: number;
136
+ D: number;
137
+ F: number;
138
+ };
139
+ }
140
+ /**
141
+ * Quality Metrics API client for RAG evaluation and hallucination detection
142
+ */
143
+ export declare const qualityMetrics: {
144
+ /**
145
+ * Get RAG quality scores for a specific trace
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * const scores = await qualityMetrics.getRagScores('trace_abc123');
150
+ * console.log(`Groundedness: ${scores.evaluation.groundedness}`);
151
+ * console.log(`Grade: ${scores.evaluation.grade}`);
152
+ * ```
153
+ */
154
+ getRagScores(traceId: string): Promise<{
155
+ traceId: string;
156
+ evaluation: RAGEvaluation;
157
+ evidence: RAGEvidence;
158
+ }>;
159
+ /**
160
+ * Get hallucination detection report for a trace
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const report = await qualityMetrics.getHallucinationReport('trace_abc123');
165
+ * if (report.report.hasHallucinations) {
166
+ * console.log(`Risk level: ${report.report.riskLevel}`);
167
+ * for (const instance of report.report.instances) {
168
+ * console.log(`- ${instance.type}: ${instance.text}`);
169
+ * }
170
+ * }
171
+ * ```
172
+ */
173
+ getHallucinationReport(traceId: string): Promise<{
174
+ traceId: string;
175
+ report: HallucinationReport;
176
+ }>;
177
+ /**
178
+ * Evaluate RAG quality for provided content (ad-hoc evaluation)
179
+ *
180
+ * @example
181
+ * ```typescript
182
+ * const result = await qualityMetrics.evaluateRag({
183
+ * query: 'What is the refund policy?',
184
+ * response: 'You can get a refund within 30 days.',
185
+ * retrievedContexts: [
186
+ * { content: 'Our refund policy allows returns within 30 days of purchase.' },
187
+ * ],
188
+ * });
189
+ * console.log(`Groundedness: ${result.evaluation.groundedness}`);
190
+ * ```
191
+ */
192
+ evaluateRag(input: {
193
+ query: string;
194
+ response: string;
195
+ retrievedContexts: RetrievedContext[];
196
+ groundTruthContexts?: GroundTruthContext[];
197
+ citations?: string[];
198
+ }): Promise<{
199
+ evaluation: RAGEvaluation;
200
+ evidence: RAGEvidence;
201
+ }>;
202
+ /**
203
+ * Detect hallucinations in provided content (ad-hoc detection)
204
+ *
205
+ * @example
206
+ * ```typescript
207
+ * const result = await qualityMetrics.detectHallucinations({
208
+ * response: 'The product costs $99 and comes with a 2-year warranty.',
209
+ * contexts: [
210
+ * { content: 'The product costs $99 with a 1-year warranty.' },
211
+ * ],
212
+ * });
213
+ * if (result.report.hasHallucinations) {
214
+ * console.log('Detected hallucinations:', result.report.instances);
215
+ * }
216
+ * ```
217
+ */
218
+ detectHallucinations(input: {
219
+ response: string;
220
+ contexts: Array<{
221
+ content: string;
222
+ metadata?: Record<string, unknown>;
223
+ }>;
224
+ query?: string;
225
+ previousResponses?: string[];
226
+ }): Promise<{
227
+ report: HallucinationReport;
228
+ }>;
229
+ /**
230
+ * Get groundedness analysis for a trace
231
+ *
232
+ * @example
233
+ * ```typescript
234
+ * const result = await qualityMetrics.getGroundedness('trace_abc123');
235
+ * console.log(`Groundedness score: ${result.groundedness.score}`);
236
+ * console.log(`Grounded spans: ${result.summary.groundedSpans}`);
237
+ * ```
238
+ */
239
+ getGroundedness(traceId: string): Promise<{
240
+ traceId: string;
241
+ groundedness: GroundednessResult;
242
+ spans: {
243
+ grounded: Array<{
244
+ text: string;
245
+ confidence: number;
246
+ sourceIndex: number;
247
+ }>;
248
+ ungrounded: Array<{
249
+ text: string;
250
+ confidence: number;
251
+ }>;
252
+ };
253
+ summary: {
254
+ totalSpans: number;
255
+ groundedSpans: number;
256
+ ungroundedSpans: number;
257
+ groundednessRatio: number;
258
+ };
259
+ }>;
260
+ /**
261
+ * Evaluate multiple traces for quality metrics in batch
262
+ *
263
+ * @example
264
+ * ```typescript
265
+ * const result = await qualityMetrics.evaluateBatch({
266
+ * traceIds: ['trace_1', 'trace_2', 'trace_3'],
267
+ * });
268
+ * console.log(`Average RAG score: ${result.summary.avgRagScore}`);
269
+ * console.log(`Hallucination rate: ${result.summary.hallucinationRate}%`);
270
+ * ```
271
+ */
272
+ evaluateBatch(options: {
273
+ traceIds: string[];
274
+ includeDetails?: boolean;
275
+ }): Promise<{
276
+ summary: BatchEvaluationSummary;
277
+ results: BatchEvaluationResult[];
278
+ }>;
279
+ };
280
+ /**
281
+ * Check if a RAG evaluation passes quality thresholds
282
+ */
283
+ export declare function passesQualityThreshold(evaluation: RAGEvaluation, thresholds?: {
284
+ minGroundedness?: number;
285
+ minOverallScore?: number;
286
+ minGrade?: 'A' | 'B' | 'C' | 'D';
287
+ }): boolean;
288
+ /**
289
+ * Check if hallucination risk is acceptable
290
+ */
291
+ export declare function isHallucinationRiskAcceptable(report: HallucinationReport, maxRiskLevel?: 'low' | 'medium' | 'high'): boolean;
292
+ /**
293
+ * Get quality recommendations based on evaluation
294
+ */
295
+ export declare function getQualityRecommendations(ragEval: RAGEvaluation, hallucinationReport?: HallucinationReport): string[];
296
+ /**
297
+ * Format quality score for display
298
+ */
299
+ export declare function formatQualityScore(score: number): string;
300
+ /**
301
+ * Get color indicator for grade
302
+ */
303
+ export declare function getGradeColor(grade: 'A' | 'B' | 'C' | 'D' | 'F'): 'green' | 'blue' | 'yellow' | 'orange' | 'red';