@thinkhive/sdk 3.1.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/MIGRATION.md +83 -12
- package/README.md +279 -128
- package/dist/api/agents.d.ts +169 -0
- package/dist/api/agents.js +185 -0
- package/dist/api/apiKeys.d.ts +252 -0
- package/dist/api/apiKeys.js +298 -0
- package/dist/api/business-metrics.d.ts +188 -0
- package/dist/api/business-metrics.js +213 -0
- package/dist/api/calibration.d.ts +0 -62
- package/dist/api/calibration.js +5 -48
- package/dist/api/claims.js +10 -7
- package/dist/api/conversation-eval.d.ts +200 -0
- package/dist/api/conversation-eval.js +235 -0
- package/dist/api/deterministic-graders.d.ts +205 -0
- package/dist/api/deterministic-graders.js +191 -0
- package/dist/api/eval-health.d.ts +250 -0
- package/dist/api/eval-health.js +224 -0
- package/dist/api/human-review.d.ts +275 -0
- package/dist/api/human-review.js +236 -0
- package/dist/api/nondeterminism.d.ts +300 -0
- package/dist/api/nondeterminism.js +250 -0
- package/dist/api/quality-metrics.d.ts +303 -0
- package/dist/api/quality-metrics.js +198 -0
- package/dist/api/roi-analytics.d.ts +263 -0
- package/dist/api/roi-analytics.js +204 -0
- package/dist/api/runs.js +12 -6
- package/dist/api/transcript-patterns.d.ts +204 -0
- package/dist/api/transcript-patterns.js +227 -0
- package/dist/core/client.d.ts +83 -9
- package/dist/core/client.js +229 -34
- package/dist/core/config.d.ts +2 -3
- package/dist/core/config.js +3 -4
- package/dist/core/types.d.ts +57 -4
- package/dist/core/types.js +1 -1
- package/dist/index.d.ts +429 -76
- package/dist/index.js +262 -42
- package/package.json +2 -2
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* ThinkHive SDK v3.0 - Deterministic Graders API
|
|
4
|
+
*
|
|
5
|
+
* API for running deterministic (code-based) evaluations
|
|
6
|
+
*/
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.deterministicGraders = void 0;
|
|
9
|
+
exports.createRegexRule = createRegexRule;
|
|
10
|
+
exports.createContainsRule = createContainsRule;
|
|
11
|
+
exports.createLengthRule = createLengthRule;
|
|
12
|
+
exports.createJsonSchemaRule = createJsonSchemaRule;
|
|
13
|
+
exports.allRulesPassed = allRulesPassed;
|
|
14
|
+
exports.getFailedRules = getFailedRules;
|
|
15
|
+
exports.calculateAverageScore = calculateAverageScore;
|
|
16
|
+
const client_1 = require("../core/client");
|
|
17
|
+
// ============================================================================
|
|
18
|
+
// DETERMINISTIC GRADERS API CLIENT
|
|
19
|
+
// ============================================================================
|
|
20
|
+
/**
|
|
21
|
+
* Deterministic Graders API client for code-based evaluations
|
|
22
|
+
*/
|
|
23
|
+
exports.deterministicGraders = {
|
|
24
|
+
/**
|
|
25
|
+
* Run deterministic evaluation on a single trace
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```typescript
|
|
29
|
+
* const result = await deterministicGraders.evaluate({
|
|
30
|
+
* traceId: 'trace_123',
|
|
31
|
+
* criterionId: 'criterion_456',
|
|
32
|
+
* });
|
|
33
|
+
* console.log(`Passed: ${result.passed}, Score: ${result.score}`);
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
async evaluate(options) {
|
|
37
|
+
return (0, client_1.apiRequestWithData)('/deterministic-graders/evaluate', {
|
|
38
|
+
method: 'POST',
|
|
39
|
+
body: options,
|
|
40
|
+
apiVersion: 'none',
|
|
41
|
+
});
|
|
42
|
+
},
|
|
43
|
+
/**
|
|
44
|
+
* Run deterministic evaluations on multiple traces
|
|
45
|
+
*
|
|
46
|
+
* @example
|
|
47
|
+
* ```typescript
|
|
48
|
+
* const { results, summary } = await deterministicGraders.bulkEvaluate({
|
|
49
|
+
* evaluations: [
|
|
50
|
+
* { traceId: 'trace_1', criterionId: 'criterion_456' },
|
|
51
|
+
* { traceId: 'trace_2', criterionId: 'criterion_456' },
|
|
52
|
+
* { traceId: 'trace_3', criterionId: 'criterion_456' },
|
|
53
|
+
* ],
|
|
54
|
+
* });
|
|
55
|
+
* console.log(`Pass rate: ${summary.passRate * 100}%`);
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
async bulkEvaluate(options) {
|
|
59
|
+
return (0, client_1.apiRequestWithData)('/deterministic-graders/bulk-evaluate', {
|
|
60
|
+
method: 'POST',
|
|
61
|
+
body: options,
|
|
62
|
+
apiVersion: 'none',
|
|
63
|
+
});
|
|
64
|
+
},
|
|
65
|
+
/**
|
|
66
|
+
* Get available rule types with descriptions
|
|
67
|
+
*
|
|
68
|
+
* @example
|
|
69
|
+
* ```typescript
|
|
70
|
+
* const ruleTypes = await deterministicGraders.getRuleTypes();
|
|
71
|
+
* for (const type of ruleTypes) {
|
|
72
|
+
* console.log(`${type.name}: ${type.description}`);
|
|
73
|
+
* }
|
|
74
|
+
* ```
|
|
75
|
+
*/
|
|
76
|
+
async getRuleTypes() {
|
|
77
|
+
return (0, client_1.apiRequestWithData)('/deterministic-graders/rule-types', { apiVersion: 'none' });
|
|
78
|
+
},
|
|
79
|
+
/**
|
|
80
|
+
* Get rule templates
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* ```typescript
|
|
84
|
+
* const templates = await deterministicGraders.getTemplates();
|
|
85
|
+
* const noPiiTemplate = templates.find(t => t.id === 'no_pii');
|
|
86
|
+
* ```
|
|
87
|
+
*/
|
|
88
|
+
async getTemplates() {
|
|
89
|
+
return (0, client_1.apiRequestWithData)('/deterministic-graders/templates', { apiVersion: 'none' });
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
// ============================================================================
|
|
93
|
+
// HELPER FUNCTIONS
|
|
94
|
+
// ============================================================================
|
|
95
|
+
/**
|
|
96
|
+
* Create a regex rule configuration
|
|
97
|
+
*
|
|
98
|
+
* @param pattern - Regular expression pattern
|
|
99
|
+
* @param flags - Regex flags (default: 'gi')
|
|
100
|
+
* @returns Rule configuration object
|
|
101
|
+
*
|
|
102
|
+
* @example
|
|
103
|
+
* ```typescript
|
|
104
|
+
* const config = createRegexRule('\\b(error|fail)\\b', 'gi');
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
function createRegexRule(pattern, flags = 'gi') {
|
|
108
|
+
return { pattern, flags };
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Create a contains rule configuration
|
|
112
|
+
*
|
|
113
|
+
* @param values - Strings to check for
|
|
114
|
+
* @param caseSensitive - Whether comparison is case-sensitive
|
|
115
|
+
* @returns Rule configuration object
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* ```typescript
|
|
119
|
+
* const config = createContainsRule(['hello', 'hi', 'hey'], false);
|
|
120
|
+
* ```
|
|
121
|
+
*/
|
|
122
|
+
function createContainsRule(values, caseSensitive = false) {
|
|
123
|
+
return { values, caseSensitive };
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Create a length rule configuration
|
|
127
|
+
*
|
|
128
|
+
* @param min - Minimum length (optional)
|
|
129
|
+
* @param max - Maximum length (optional)
|
|
130
|
+
* @returns Rule configuration object
|
|
131
|
+
*
|
|
132
|
+
* @example
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const config = createLengthRule(50, 1000);
|
|
135
|
+
* ```
|
|
136
|
+
*/
|
|
137
|
+
function createLengthRule(min, max) {
|
|
138
|
+
return { min, max };
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Create a JSON schema rule configuration
|
|
142
|
+
*
|
|
143
|
+
* @param schema - JSON Schema object
|
|
144
|
+
* @returns Rule configuration object
|
|
145
|
+
*
|
|
146
|
+
* @example
|
|
147
|
+
* ```typescript
|
|
148
|
+
* const config = createJsonSchemaRule({
|
|
149
|
+
* type: 'object',
|
|
150
|
+
* required: ['name', 'email'],
|
|
151
|
+
* properties: {
|
|
152
|
+
* name: { type: 'string' },
|
|
153
|
+
* email: { type: 'string', format: 'email' },
|
|
154
|
+
* },
|
|
155
|
+
* });
|
|
156
|
+
* ```
|
|
157
|
+
*/
|
|
158
|
+
function createJsonSchemaRule(schema) {
|
|
159
|
+
return { schema };
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Check if all rule results passed
|
|
163
|
+
*
|
|
164
|
+
* @param results - Array of rule results
|
|
165
|
+
* @returns Whether all rules passed
|
|
166
|
+
*/
|
|
167
|
+
function allRulesPassed(results) {
|
|
168
|
+
return results.every(r => r.passed);
|
|
169
|
+
}
|
|
170
|
+
/**
|
|
171
|
+
* Get failed rules from results
|
|
172
|
+
*
|
|
173
|
+
* @param results - Array of rule results
|
|
174
|
+
* @returns Array of failed rule results
|
|
175
|
+
*/
|
|
176
|
+
function getFailedRules(results) {
|
|
177
|
+
return results.filter(r => !r.passed);
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Calculate average score from rule results
|
|
181
|
+
*
|
|
182
|
+
* @param results - Array of rule results
|
|
183
|
+
* @returns Average score (0-100)
|
|
184
|
+
*/
|
|
185
|
+
function calculateAverageScore(results) {
|
|
186
|
+
if (results.length === 0)
|
|
187
|
+
return 0;
|
|
188
|
+
const sum = results.reduce((acc, r) => acc + r.score, 0);
|
|
189
|
+
return sum / results.length;
|
|
190
|
+
}
|
|
191
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"deterministic-graders.js","sourceRoot":"","sources":["../../src/api/deterministic-graders.ts"],"names":[],"mappings":";AAAA;;;;GAIG;;;AAsLH,0CAEC;AAcD,gDAKC;AAcD,4CAEC;AAoBD,oDAEC;AAQD,wCAEC;AAQD,wCAEC;AAQD,sDAIC;AA/QD,2CAAoD;AA8EpD,+EAA+E;AAC/E,mCAAmC;AACnC,+EAA+E;AAE/E;;GAEG;AACU,QAAA,oBAAoB,GAAG;IAClC;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,QAAQ,CAAC,OAAwB;QACrC,OAAO,IAAA,2BAAkB,EAA0B,iCAAiC,EAAE;YACpF,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,YAAY,CAAC,OAA4B;QAC7C,OAAO,IAAA,2BAAkB,EAAqB,sCAAsC,EAAE;YACpF,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,OAAO;YACb,UAAU,EAAE,MAAM;SACnB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;OAUG;IACH,KAAK,CAAC,YAAY;QAChB,OAAO,IAAA,2BAAkB,EACvB,mCAAmC,EACnC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,YAAY;QAChB,OAAO,IAAA,2BAAkB,EACvB,kCAAkC,EAClC,EAAE,UAAU,EAAE,MAAM,EAAE,CACvB,CAAC;IACJ,CAAC;CACF,CAAC;AAEF,+EAA+E;AAC/E,mBAAmB;AACnB,+EAA+E;AAE/E;;;;;;;;;;;GAWG;AACH,SAAgB,eAAe,CAAC,OAAe,EAAE,KAAK,GAAG,IAAI;IAC3D,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;AAC5B,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,kBAAkB,CAChC,MAAgB,EAChB,aAAa,GAAG,KAAK;IAErB,OAAO,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;AACnC,CAAC;AAED;;;;;;;;;;;GAWG;AACH,SAAgB,gBAAgB,CAAC,GAAY,EAAE,GAAY;IACzD,OAAO,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;AACtB,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,SAAgB,oBAAoB,CAAC,MAA+B;IAClE,OAAO,EAAE,MAAM,EAAE,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,OAAqB;IAClD,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AACtC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,cAAc,CAAC,OAAqB;IAClD,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;AACxC,CAAC;AAED;;;;;GAKG;AACH,SAAgB,qBAAqB,CAAC,OAAqB;IACzD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACnC,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IACzD,OAAO,GAAG,GAAG,OAAO,CAAC,MAAM,CAAC;AAC9B,CAAC","sourcesContent":["/**\n * ThinkHive SDK v3.0 - Deterministic Graders API\n *\n * API for running deterministic (code-based) evaluations\n */\n\nimport { apiRequestWithData } from '../core/client';\n\n// ============================================================================\n// TYPES\n// ============================================================================\n\nexport type RuleType =\n  | 'regex'\n  | 'contains'\n  | 'not_contains'\n  | 'json_valid'\n  | 'json_schema'\n  | 'length'\n  | 'pii_check'\n  | 'sentiment'\n  | 'latency'\n  | 'token_count';\n\nexport interface DeterministicEvalResult {\n  passed: boolean;\n  score: number;\n  reasoning: string;\n  ruleResults?: RuleResult[];\n  metadata?: Record<string, unknown>;\n}\n\nexport interface RuleResult {\n  ruleId: string;\n  ruleName: string;\n  ruleType: RuleType;\n  passed: boolean;\n  score: number;\n  details?: string;\n}\n\nexport interface EvaluateOptions {\n  traceId: string;\n  criterionId: string;\n}\n\nexport interface BulkEvaluateOptions {\n  evaluations: Array<{\n    traceId: string;\n    criterionId: string;\n  }>;\n}\n\nexport interface BulkEvaluateResult {\n  results: Array<{\n    traceId: string;\n    criterionId: string;\n    passed: boolean;\n    score: number;\n    error?: string;\n  }>;\n  summary: {\n    total: number;\n    passed: number;\n    failed: number;\n    passRate: number;\n  };\n}\n\nexport interface RuleTypeInfo {\n  id: RuleType;\n  name: string;\n  description: string;\n  configFields: string[];\n}\n\nexport interface RuleTemplate {\n  id: string;\n  name: string;\n  description: string;\n  ruleType: RuleType;\n  config: Record<string, unknown>;\n}\n\n// ============================================================================\n// DETERMINISTIC GRADERS API CLIENT\n// ============================================================================\n\n/**\n * Deterministic Graders API client for code-based evaluations\n */\nexport const deterministicGraders = {\n  /**\n   * Run deterministic evaluation on a single trace\n   *\n   * @example\n   * ```typescript\n   * const result = await deterministicGraders.evaluate({\n   *   traceId: 'trace_123',\n   *   criterionId: 'criterion_456',\n   * });\n   * console.log(`Passed: ${result.passed}, Score: ${result.score}`);\n   * ```\n   */\n  async evaluate(options: EvaluateOptions): Promise<DeterministicEvalResult> {\n    return apiRequestWithData<DeterministicEvalResult>('/deterministic-graders/evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Run deterministic evaluations on multiple traces\n   *\n   * @example\n   * ```typescript\n   * const { results, summary } = await deterministicGraders.bulkEvaluate({\n   *   evaluations: [\n   *     { traceId: 'trace_1', criterionId: 'criterion_456' },\n   *     { traceId: 'trace_2', criterionId: 'criterion_456' },\n   *     { traceId: 'trace_3', criterionId: 'criterion_456' },\n   *   ],\n   * });\n   * console.log(`Pass rate: ${summary.passRate * 100}%`);\n   * ```\n   */\n  async bulkEvaluate(options: BulkEvaluateOptions): Promise<BulkEvaluateResult> {\n    return apiRequestWithData<BulkEvaluateResult>('/deterministic-graders/bulk-evaluate', {\n      method: 'POST',\n      body: options,\n      apiVersion: 'none',\n    });\n  },\n\n  /**\n   * Get available rule types with descriptions\n   *\n   * @example\n   * ```typescript\n   * const ruleTypes = await deterministicGraders.getRuleTypes();\n   * for (const type of ruleTypes) {\n   *   console.log(`${type.name}: ${type.description}`);\n   * }\n   * ```\n   */\n  async getRuleTypes(): Promise<RuleTypeInfo[]> {\n    return apiRequestWithData<RuleTypeInfo[]>(\n      '/deterministic-graders/rule-types',\n      { apiVersion: 'none' }\n    );\n  },\n\n  /**\n   * Get rule templates\n   *\n   * @example\n   * ```typescript\n   * const templates = await deterministicGraders.getTemplates();\n   * const noPiiTemplate = templates.find(t => t.id === 'no_pii');\n   * ```\n   */\n  async getTemplates(): Promise<RuleTemplate[]> {\n    return apiRequestWithData<RuleTemplate[]>(\n      '/deterministic-graders/templates',\n      { apiVersion: 'none' }\n    );\n  },\n};\n\n// ============================================================================\n// HELPER FUNCTIONS\n// ============================================================================\n\n/**\n * Create a regex rule configuration\n *\n * @param pattern - Regular expression pattern\n * @param flags - Regex flags (default: 'gi')\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createRegexRule('\\\\b(error|fail)\\\\b', 'gi');\n * ```\n */\nexport function createRegexRule(pattern: string, flags = 'gi'): { pattern: string; flags: string } {\n  return { pattern, flags };\n}\n\n/**\n * Create a contains rule configuration\n *\n * @param values - Strings to check for\n * @param caseSensitive - Whether comparison is case-sensitive\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createContainsRule(['hello', 'hi', 'hey'], false);\n * ```\n */\nexport function createContainsRule(\n  values: string[],\n  caseSensitive = false\n): { values: string[]; caseSensitive: boolean } {\n  return { values, caseSensitive };\n}\n\n/**\n * Create a length rule configuration\n *\n * @param min - Minimum length (optional)\n * @param max - Maximum length (optional)\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createLengthRule(50, 1000);\n * ```\n */\nexport function createLengthRule(min?: number, max?: number): { min?: number; max?: number } {\n  return { min, max };\n}\n\n/**\n * Create a JSON schema rule configuration\n *\n * @param schema - JSON Schema object\n * @returns Rule configuration object\n *\n * @example\n * ```typescript\n * const config = createJsonSchemaRule({\n *   type: 'object',\n *   required: ['name', 'email'],\n *   properties: {\n *     name: { type: 'string' },\n *     email: { type: 'string', format: 'email' },\n *   },\n * });\n * ```\n */\nexport function createJsonSchemaRule(schema: Record<string, unknown>): { schema: Record<string, unknown> } {\n  return { schema };\n}\n\n/**\n * Check if all rule results passed\n *\n * @param results - Array of rule results\n * @returns Whether all rules passed\n */\nexport function allRulesPassed(results: RuleResult[]): boolean {\n  return results.every(r => r.passed);\n}\n\n/**\n * Get failed rules from results\n *\n * @param results - Array of rule results\n * @returns Array of failed rule results\n */\nexport function getFailedRules(results: RuleResult[]): RuleResult[] {\n  return results.filter(r => !r.passed);\n}\n\n/**\n * Calculate average score from rule results\n *\n * @param results - Array of rule results\n * @returns Average score (0-100)\n */\nexport function calculateAverageScore(results: RuleResult[]): number {\n  if (results.length === 0) return 0;\n  const sum = results.reduce((acc, r) => acc + r.score, 0);\n  return sum / results.length;\n}\n"]}
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ThinkHive SDK v3.0 - Evaluation Health API
|
|
3
|
+
*
|
|
4
|
+
* API for eval saturation monitoring, regression detection, and health reports
|
|
5
|
+
*/
|
|
6
|
+
export type SaturationType = 'ceiling' | 'floor' | 'healthy';
|
|
7
|
+
export type HealthStatus = 'healthy' | 'warning' | 'critical';
|
|
8
|
+
export type RegressionSeverity = 'minor' | 'moderate' | 'severe';
|
|
9
|
+
export interface EvalHealthSnapshot {
|
|
10
|
+
id: string;
|
|
11
|
+
companyId: string;
|
|
12
|
+
agentId: string;
|
|
13
|
+
criterionId?: string;
|
|
14
|
+
snapshotDate: string;
|
|
15
|
+
passRate?: string;
|
|
16
|
+
evalCount?: number;
|
|
17
|
+
meanScore?: string;
|
|
18
|
+
saturationType?: SaturationType;
|
|
19
|
+
daysAtSaturation?: number;
|
|
20
|
+
trendDirection?: string;
|
|
21
|
+
trendStrength?: string;
|
|
22
|
+
healthStatus?: HealthStatus;
|
|
23
|
+
healthScore?: string;
|
|
24
|
+
createdAt: string;
|
|
25
|
+
}
|
|
26
|
+
export interface EvalRegression {
|
|
27
|
+
id: string;
|
|
28
|
+
companyId: string;
|
|
29
|
+
agentId: string;
|
|
30
|
+
criterionId?: string;
|
|
31
|
+
severity: RegressionSeverity;
|
|
32
|
+
baselinePassRate: string;
|
|
33
|
+
currentPassRate: string;
|
|
34
|
+
delta: string;
|
|
35
|
+
deltaPercent?: string;
|
|
36
|
+
baselinePeriodStart: string;
|
|
37
|
+
baselinePeriodEnd: string;
|
|
38
|
+
currentPeriodStart: string;
|
|
39
|
+
currentPeriodEnd: string;
|
|
40
|
+
baselineEvalCount?: number;
|
|
41
|
+
currentEvalCount?: number;
|
|
42
|
+
suspectedCauses?: unknown[];
|
|
43
|
+
isSignificant?: boolean;
|
|
44
|
+
isResolved: boolean;
|
|
45
|
+
isAcknowledged: boolean;
|
|
46
|
+
resolvedAt?: string;
|
|
47
|
+
resolvedBy?: string;
|
|
48
|
+
resolutionType?: string;
|
|
49
|
+
resolutionNotes?: string;
|
|
50
|
+
acknowledgedAt?: string;
|
|
51
|
+
acknowledgedBy?: string;
|
|
52
|
+
detectedAt: string;
|
|
53
|
+
createdAt: string;
|
|
54
|
+
}
|
|
55
|
+
export interface HealthReport {
|
|
56
|
+
agentId: string;
|
|
57
|
+
generatedAt: string;
|
|
58
|
+
overallHealth: HealthStatus;
|
|
59
|
+
overallScore: number;
|
|
60
|
+
passRate: number;
|
|
61
|
+
evalCount: number;
|
|
62
|
+
saturationStatus: {
|
|
63
|
+
type: SaturationType;
|
|
64
|
+
daysAtSaturation: number;
|
|
65
|
+
recommendation: string;
|
|
66
|
+
};
|
|
67
|
+
regressionCount: number;
|
|
68
|
+
activeRegressions: EvalRegression[];
|
|
69
|
+
trend: {
|
|
70
|
+
direction: 'improving' | 'stable' | 'declining';
|
|
71
|
+
strength: number;
|
|
72
|
+
description: string;
|
|
73
|
+
};
|
|
74
|
+
recommendations: string[];
|
|
75
|
+
}
|
|
76
|
+
export interface CreateSnapshotOptions {
|
|
77
|
+
agentId: string;
|
|
78
|
+
criterionId?: string;
|
|
79
|
+
snapshotDate: string;
|
|
80
|
+
passRate?: string;
|
|
81
|
+
evalCount?: number;
|
|
82
|
+
meanScore?: string;
|
|
83
|
+
saturationType?: SaturationType;
|
|
84
|
+
daysAtSaturation?: number;
|
|
85
|
+
trendDirection?: string;
|
|
86
|
+
trendStrength?: string;
|
|
87
|
+
healthStatus?: HealthStatus;
|
|
88
|
+
healthScore?: string;
|
|
89
|
+
}
|
|
90
|
+
export interface CreateRegressionOptions {
|
|
91
|
+
agentId: string;
|
|
92
|
+
criterionId?: string;
|
|
93
|
+
severity: RegressionSeverity;
|
|
94
|
+
baselinePassRate: string;
|
|
95
|
+
currentPassRate: string;
|
|
96
|
+
delta: string;
|
|
97
|
+
deltaPercent?: string;
|
|
98
|
+
baselinePeriodStart: string;
|
|
99
|
+
baselinePeriodEnd: string;
|
|
100
|
+
currentPeriodStart: string;
|
|
101
|
+
currentPeriodEnd: string;
|
|
102
|
+
baselineEvalCount?: number;
|
|
103
|
+
currentEvalCount?: number;
|
|
104
|
+
suspectedCauses?: unknown[];
|
|
105
|
+
isSignificant?: boolean;
|
|
106
|
+
detectedAt?: string;
|
|
107
|
+
}
|
|
108
|
+
export interface ResolveRegressionOptions {
|
|
109
|
+
resolutionType: string;
|
|
110
|
+
notes?: string;
|
|
111
|
+
}
|
|
112
|
+
export interface GetSnapshotsOptions {
|
|
113
|
+
agentId: string;
|
|
114
|
+
criterionId?: string;
|
|
115
|
+
startDate?: string;
|
|
116
|
+
endDate?: string;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Evaluation Health API client for monitoring eval quality and detecting regressions
|
|
120
|
+
*/
|
|
121
|
+
export declare const evalHealth: {
|
|
122
|
+
/**
|
|
123
|
+
* Get comprehensive health report for an agent
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* ```typescript
|
|
127
|
+
* const report = await evalHealth.getReport('agent_123');
|
|
128
|
+
* console.log(`Overall health: ${report.overallHealth}`);
|
|
129
|
+
* console.log(`Active regressions: ${report.regressionCount}`);
|
|
130
|
+
* ```
|
|
131
|
+
*/
|
|
132
|
+
getReport(agentId: string): Promise<HealthReport>;
|
|
133
|
+
/**
|
|
134
|
+
* Get historical health snapshots
|
|
135
|
+
*
|
|
136
|
+
* @example
|
|
137
|
+
* ```typescript
|
|
138
|
+
* const snapshots = await evalHealth.getSnapshots({
|
|
139
|
+
* agentId: 'agent_123',
|
|
140
|
+
* startDate: '2024-01-01T00:00:00Z',
|
|
141
|
+
* endDate: '2024-01-31T23:59:59Z',
|
|
142
|
+
* });
|
|
143
|
+
* ```
|
|
144
|
+
*/
|
|
145
|
+
getSnapshots(options: GetSnapshotsOptions): Promise<EvalHealthSnapshot[]>;
|
|
146
|
+
/**
|
|
147
|
+
* Get latest health snapshot
|
|
148
|
+
*
|
|
149
|
+
* @example
|
|
150
|
+
* ```typescript
|
|
151
|
+
* const snapshot = await evalHealth.getLatestSnapshot('agent_123');
|
|
152
|
+
* ```
|
|
153
|
+
*/
|
|
154
|
+
getLatestSnapshot(agentId: string, criterionId?: string): Promise<EvalHealthSnapshot | null>;
|
|
155
|
+
/**
|
|
156
|
+
* Record a health snapshot
|
|
157
|
+
*
|
|
158
|
+
* @example
|
|
159
|
+
* ```typescript
|
|
160
|
+
* const snapshot = await evalHealth.recordSnapshot({
|
|
161
|
+
* agentId: 'agent_123',
|
|
162
|
+
* snapshotDate: new Date().toISOString(),
|
|
163
|
+
* passRate: '0.85',
|
|
164
|
+
* evalCount: 150,
|
|
165
|
+
* healthStatus: 'healthy',
|
|
166
|
+
* });
|
|
167
|
+
* ```
|
|
168
|
+
*/
|
|
169
|
+
recordSnapshot(options: CreateSnapshotOptions): Promise<EvalHealthSnapshot>;
|
|
170
|
+
/**
|
|
171
|
+
* Get unresolved regressions for an agent
|
|
172
|
+
*
|
|
173
|
+
* @example
|
|
174
|
+
* ```typescript
|
|
175
|
+
* const regressions = await evalHealth.getRegressions('agent_123');
|
|
176
|
+
* for (const regression of regressions) {
|
|
177
|
+
* console.log(`${regression.severity}: ${regression.delta}% drop`);
|
|
178
|
+
* }
|
|
179
|
+
* ```
|
|
180
|
+
*/
|
|
181
|
+
getRegressions(agentId: string): Promise<EvalRegression[]>;
|
|
182
|
+
/**
|
|
183
|
+
* Record a new regression
|
|
184
|
+
*
|
|
185
|
+
* @example
|
|
186
|
+
* ```typescript
|
|
187
|
+
* const regression = await evalHealth.recordRegression({
|
|
188
|
+
* agentId: 'agent_123',
|
|
189
|
+
* severity: 'moderate',
|
|
190
|
+
* baselinePassRate: '0.92',
|
|
191
|
+
* currentPassRate: '0.78',
|
|
192
|
+
* delta: '-0.14',
|
|
193
|
+
* baselinePeriodStart: '2024-01-01T00:00:00Z',
|
|
194
|
+
* baselinePeriodEnd: '2024-01-15T23:59:59Z',
|
|
195
|
+
* currentPeriodStart: '2024-01-16T00:00:00Z',
|
|
196
|
+
* currentPeriodEnd: '2024-01-31T23:59:59Z',
|
|
197
|
+
* });
|
|
198
|
+
* ```
|
|
199
|
+
*/
|
|
200
|
+
recordRegression(options: CreateRegressionOptions): Promise<EvalRegression>;
|
|
201
|
+
/**
|
|
202
|
+
* Resolve a regression
|
|
203
|
+
*
|
|
204
|
+
* @example
|
|
205
|
+
* ```typescript
|
|
206
|
+
* await evalHealth.resolveRegression('regression_123', {
|
|
207
|
+
* resolutionType: 'fixed',
|
|
208
|
+
* notes: 'Updated prompt template to address quality issues',
|
|
209
|
+
* });
|
|
210
|
+
* ```
|
|
211
|
+
*/
|
|
212
|
+
resolveRegression(regressionId: string, options: ResolveRegressionOptions): Promise<void>;
|
|
213
|
+
/**
|
|
214
|
+
* Acknowledge a regression
|
|
215
|
+
*
|
|
216
|
+
* @example
|
|
217
|
+
* ```typescript
|
|
218
|
+
* await evalHealth.acknowledgeRegression('regression_123');
|
|
219
|
+
* ```
|
|
220
|
+
*/
|
|
221
|
+
acknowledgeRegression(regressionId: string): Promise<void>;
|
|
222
|
+
};
|
|
223
|
+
/**
|
|
224
|
+
* Check if health status indicates an issue
|
|
225
|
+
*
|
|
226
|
+
* @param status - Health status to check
|
|
227
|
+
* @returns Whether the status indicates a problem
|
|
228
|
+
*/
|
|
229
|
+
export declare function hasHealthIssue(status: HealthStatus): boolean;
|
|
230
|
+
/**
|
|
231
|
+
* Get severity level as numeric value for sorting
|
|
232
|
+
*
|
|
233
|
+
* @param severity - Regression severity
|
|
234
|
+
* @returns Numeric severity (1-3, higher is worse)
|
|
235
|
+
*/
|
|
236
|
+
export declare function getSeverityLevel(severity: RegressionSeverity): number;
|
|
237
|
+
/**
|
|
238
|
+
* Check if evaluation is saturated
|
|
239
|
+
*
|
|
240
|
+
* @param snapshot - Health snapshot to check
|
|
241
|
+
* @returns Whether evaluation is at ceiling or floor
|
|
242
|
+
*/
|
|
243
|
+
export declare function isSaturated(snapshot: EvalHealthSnapshot): boolean;
|
|
244
|
+
/**
|
|
245
|
+
* Get recommendation for saturation type
|
|
246
|
+
*
|
|
247
|
+
* @param saturationType - Type of saturation
|
|
248
|
+
* @returns Recommendation string
|
|
249
|
+
*/
|
|
250
|
+
export declare function getSaturationRecommendation(saturationType: SaturationType): string;
|