evalsense 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,299 @@
1
+ export { FaithfulnessConfig, HallucinationConfig, RelevanceConfig, ToxicityConfig, faithfulness, hallucination, relevance, toxicity } from './opinionated/index.cjs';
2
+ import { M as MetricFn, a as MetricConfig, b as MetricOutput, L as LLMClient, J as JSONSchema } from '../types-C71p0wzM.cjs';
3
+
4
+ /**
5
+ * Custom metric registration
6
+ */
7
+
8
+ /**
9
+ * Registers a custom metric
10
+ *
11
+ * @example
12
+ * ```ts
13
+ * registerMetric("custom-relevance", async ({ outputs, query }) => {
14
+ * // Custom evaluation logic
15
+ * return outputs.map(o => ({
16
+ * id: o.id,
17
+ * metric: "custom-relevance",
18
+ * score: evaluateRelevance(o.output, query),
19
+ * }));
20
+ * });
21
+ * ```
22
+ */
23
+ declare function registerMetric(name: string, fn: MetricFn): void;
24
+ /**
25
+ * Gets a registered custom metric
26
+ */
27
+ declare function getMetric(name: string): MetricFn | undefined;
28
+ /**
29
+ * Runs a registered metric
30
+ */
31
+ declare function runMetric(name: string, config: MetricConfig): Promise<MetricOutput[]>;
32
+ /**
33
+ * Lists all registered custom metrics
34
+ */
35
+ declare function listMetrics(): string[];
36
+ /**
37
+ * Unregisters a metric (mainly for testing)
38
+ */
39
+ declare function unregisterMetric(name: string): boolean;
40
+ /**
41
+ * Clears all registered metrics (mainly for testing)
42
+ */
43
+ declare function clearMetrics(): void;
44
+ /**
45
+ * Creates a simple string-matching metric
46
+ */
47
+ declare function createPatternMetric(name: string, patterns: RegExp[], options?: {
48
+ matchScore?: number;
49
+ noMatchScore?: number;
50
+ }): MetricFn;
51
+ /**
52
+ * Creates a keyword-based metric
53
+ */
54
+ declare function createKeywordMetric(name: string, keywords: string[], options?: {
55
+ caseSensitive?: boolean;
56
+ threshold?: number;
57
+ }): MetricFn;
58
+
59
+ /**
60
+ * Metric utilities
61
+ */
62
+
63
+ /**
64
+ * Normalizes a score to 0-1 range
65
+ */
66
+ declare function normalizeScore(score: number, min?: number, max?: number): number;
67
+ /**
68
+ * Converts a numeric score to a label based on thresholds
69
+ */
70
+ declare function scoreToLabel(score: number, thresholds: {
71
+ label: string;
72
+ min: number;
73
+ }[]): string;
74
+ /**
75
+ * Creates a metric output from a score
76
+ */
77
+ declare function createMetricOutput(id: string, metric: string, score: number, labelThresholds?: {
78
+ label: string;
79
+ min: number;
80
+ }[]): MetricOutput;
81
+ /**
82
+ * Default thresholds for binary metrics
83
+ */
84
+ declare const BINARY_THRESHOLDS: {
85
+ label: string;
86
+ min: number;
87
+ }[];
88
+ /**
89
+ * Default thresholds for severity metrics
90
+ */
91
+ declare const SEVERITY_THRESHOLDS: {
92
+ label: string;
93
+ min: number;
94
+ }[];
95
+ /**
96
+ * Batches items for parallel processing
97
+ */
98
+ declare function batch<T>(items: T[], size: number): T[][];
99
+ /**
100
+ * Delays execution
101
+ */
102
+ declare function delay(ms: number): Promise<void>;
103
+
104
+ /**
105
+ * LLM client management for metric evaluation
106
+ *
107
+ * Provides a global LLM client that can be configured once and used
108
+ * across all LLM-based metrics, with support for per-call overrides.
109
+ */
110
+
111
+ /**
112
+ * Sets the global LLM client for all metrics
113
+ *
114
+ * @example
115
+ * ```ts
116
+ * import { setLLMClient } from "evalsense/metrics";
117
+ *
118
+ * setLLMClient({
119
+ * async complete(prompt) {
120
+ * return await yourLLM.generate(prompt);
121
+ * }
122
+ * });
123
+ * ```
124
+ */
125
+ declare function setLLMClient(client: LLMClient): void;
126
+ /**
127
+ * Gets the current global LLM client
128
+ *
129
+ * @returns The global client or null if not set
130
+ */
131
+ declare function getLLMClient(): LLMClient | null;
132
+ /**
133
+ * Resets the global LLM client
134
+ *
135
+ * Useful for testing or switching between different LLM providers.
136
+ */
137
+ declare function resetLLMClient(): void;
138
+ /**
139
+ * Validates that an LLM client is available
140
+ *
141
+ * @param client - Optional client override
142
+ * @param metricName - Name of the metric for error messages
143
+ * @throws Error if no client is configured
144
+ * @returns The client to use (override or global)
145
+ */
146
+ declare function requireLLMClient(client: LLMClient | undefined, metricName: string): LLMClient;
147
+
148
+ /**
149
+ * Utilities for LLM-based metric evaluation
150
+ *
151
+ * Provides helpers for prompt templating, response parsing, validation, and error handling.
152
+ */
153
+
154
+ /**
155
+ * Fills a prompt template with variables
156
+ *
157
+ * @example
158
+ * ```ts
159
+ * const prompt = fillPrompt(
160
+ * "Context: {context}\nOutput: {output}",
161
+ * { context: "Paris is the capital", output: "France's capital is Paris" }
162
+ * );
163
+ * ```
164
+ */
165
+ declare function fillPrompt(template: string, variables: Record<string, string>): string;
166
+ /**
167
+ * Parses a JSON response from an LLM, with fallback handling
168
+ *
169
+ * Handles:
170
+ * - Plain JSON strings
171
+ * - JSON wrapped in markdown code blocks
172
+ * - Malformed JSON with helpful error messages
173
+ *
174
+ * @example
175
+ * ```ts
176
+ * const result = parseJSONResponse<{ score: number }>(llmResponse);
177
+ * ```
178
+ */
179
+ declare function parseJSONResponse<T>(response: string): T;
180
+ /**
181
+ * Validates that a parsed JSON response has required fields
182
+ *
183
+ * @example
184
+ * ```ts
185
+ * validateResponse(result, ["score", "reasoning"], "hallucination");
186
+ * ```
187
+ */
188
+ declare function validateResponse(response: unknown, requiredFields: string[], metricName: string): void;
189
+ /**
190
+ * Extracts a score from various formats (number, string, object with score field)
191
+ */
192
+ declare function extractScore(value: unknown, defaultScore?: number): number;
193
+ /**
194
+ * Creates a JSON schema for structured LLM outputs
195
+ *
196
+ * @example
197
+ * ```ts
198
+ * const schema = createJSONSchema({
199
+ * score: "number",
200
+ * reasoning: "string"
201
+ * });
202
+ * ```
203
+ */
204
+ declare function createJSONSchema(properties: Record<string, string>, required?: string[]): JSONSchema;
205
+ /**
206
+ * Batches an array of items into chunks
207
+ *
208
+ * Useful for batch evaluation mode to control batch size.
209
+ */
210
+ declare function batchItems<T>(items: T[], batchSize: number): T[][];
211
+ /**
212
+ * Creates a consistent error message for LLM metric failures
213
+ */
214
+ declare function createLLMError(metricName: string, operation: string, error: unknown, context?: {
215
+ id?: string;
216
+ index?: number;
217
+ }): Error;
218
+ /**
219
+ * Waits for a promise with a timeout
220
+ */
221
+ declare function withTimeout<T>(promise: Promise<T>, timeoutMs: number, operation: string): Promise<T>;
222
+
223
+ /**
224
+ * Mock LLM client for testing
225
+ *
226
+ * Provides a configurable mock implementation of LLMClient for unit tests.
227
+ */
228
+
229
+ /**
230
+ * Configuration for mock LLM client
231
+ */
232
+ interface MockLLMConfig {
233
+ /** Fixed response to return (can be string or object for JSON mode) */
234
+ response?: string | Record<string, unknown>;
235
+ /** Multiple responses for sequential calls */
236
+ responses?: Array<string | Record<string, unknown>>;
237
+ /** Delay in milliseconds before responding */
238
+ delay?: number;
239
+ /** Whether to throw an error */
240
+ shouldError?: boolean;
241
+ /** Error message to throw */
242
+ errorMessage?: string;
243
+ /** Function to validate prompts */
244
+ onPrompt?: (prompt: string) => void;
245
+ }
246
+ /**
247
+ * Creates a mock LLM client for testing
248
+ *
249
+ * @example
250
+ * ```ts
251
+ * const mock = createMockLLMClient({
252
+ * response: JSON.stringify({ score: 0.8, reasoning: "test" }),
253
+ * delay: 100
254
+ * });
255
+ *
256
+ * setLLMClient(mock);
257
+ * ```
258
+ */
259
+ declare function createMockLLMClient(config?: MockLLMConfig): LLMClient;
260
+ /**
261
+ * Creates a mock client that returns sequential responses
262
+ *
263
+ * Useful for testing multiple calls with different responses.
264
+ *
265
+ * @example
266
+ * ```ts
267
+ * const mock = createSequentialMockClient([
268
+ * { score: 0.2, reasoning: "First call" },
269
+ * { score: 0.8, reasoning: "Second call" }
270
+ * ]);
271
+ * ```
272
+ */
273
+ declare function createSequentialMockClient(responses: Array<string | Record<string, unknown>>, options?: {
274
+ delay?: number;
275
+ }): LLMClient;
276
+ /**
277
+ * Creates a mock client that always errors
278
+ *
279
+ * Useful for testing error handling.
280
+ */
281
+ declare function createErrorMockClient(errorMessage?: string): LLMClient;
282
+ /**
283
+ * Creates a spy mock client that records all prompts
284
+ *
285
+ * Useful for testing what prompts are being sent to the LLM.
286
+ *
287
+ * @example
288
+ * ```ts
289
+ * const { client, prompts } = createSpyMockClient({ score: 0.5 });
290
+ * await metric({ outputs, context, llmClient: client });
291
+ * console.log(prompts); // See all prompts that were sent
292
+ * ```
293
+ */
294
+ declare function createSpyMockClient(response: string | Record<string, unknown>): {
295
+ client: LLMClient;
296
+ prompts: string[];
297
+ };
298
+
299
+ export { BINARY_THRESHOLDS, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
@@ -0,0 +1,299 @@
1
+ export { FaithfulnessConfig, HallucinationConfig, RelevanceConfig, ToxicityConfig, faithfulness, hallucination, relevance, toxicity } from './opinionated/index.js';
2
+ import { M as MetricFn, a as MetricConfig, b as MetricOutput, L as LLMClient, J as JSONSchema } from '../types-C71p0wzM.js';
3
+
4
+ /**
5
+ * Custom metric registration
6
+ */
7
+
8
+ /**
9
+ * Registers a custom metric
10
+ *
11
+ * @example
12
+ * ```ts
13
+ * registerMetric("custom-relevance", async ({ outputs, query }) => {
14
+ * // Custom evaluation logic
15
+ * return outputs.map(o => ({
16
+ * id: o.id,
17
+ * metric: "custom-relevance",
18
+ * score: evaluateRelevance(o.output, query),
19
+ * }));
20
+ * });
21
+ * ```
22
+ */
23
+ declare function registerMetric(name: string, fn: MetricFn): void;
24
+ /**
25
+ * Gets a registered custom metric
26
+ */
27
+ declare function getMetric(name: string): MetricFn | undefined;
28
+ /**
29
+ * Runs a registered metric
30
+ */
31
+ declare function runMetric(name: string, config: MetricConfig): Promise<MetricOutput[]>;
32
+ /**
33
+ * Lists all registered custom metrics
34
+ */
35
+ declare function listMetrics(): string[];
36
+ /**
37
+ * Unregisters a metric (mainly for testing)
38
+ */
39
+ declare function unregisterMetric(name: string): boolean;
40
+ /**
41
+ * Clears all registered metrics (mainly for testing)
42
+ */
43
+ declare function clearMetrics(): void;
44
+ /**
45
+ * Creates a simple string-matching metric
46
+ */
47
+ declare function createPatternMetric(name: string, patterns: RegExp[], options?: {
48
+ matchScore?: number;
49
+ noMatchScore?: number;
50
+ }): MetricFn;
51
+ /**
52
+ * Creates a keyword-based metric
53
+ */
54
+ declare function createKeywordMetric(name: string, keywords: string[], options?: {
55
+ caseSensitive?: boolean;
56
+ threshold?: number;
57
+ }): MetricFn;
58
+
59
+ /**
60
+ * Metric utilities
61
+ */
62
+
63
+ /**
64
+ * Normalizes a score to 0-1 range
65
+ */
66
+ declare function normalizeScore(score: number, min?: number, max?: number): number;
67
+ /**
68
+ * Converts a numeric score to a label based on thresholds
69
+ */
70
+ declare function scoreToLabel(score: number, thresholds: {
71
+ label: string;
72
+ min: number;
73
+ }[]): string;
74
+ /**
75
+ * Creates a metric output from a score
76
+ */
77
+ declare function createMetricOutput(id: string, metric: string, score: number, labelThresholds?: {
78
+ label: string;
79
+ min: number;
80
+ }[]): MetricOutput;
81
+ /**
82
+ * Default thresholds for binary metrics
83
+ */
84
+ declare const BINARY_THRESHOLDS: {
85
+ label: string;
86
+ min: number;
87
+ }[];
88
+ /**
89
+ * Default thresholds for severity metrics
90
+ */
91
+ declare const SEVERITY_THRESHOLDS: {
92
+ label: string;
93
+ min: number;
94
+ }[];
95
+ /**
96
+ * Batches items for parallel processing
97
+ */
98
+ declare function batch<T>(items: T[], size: number): T[][];
99
+ /**
100
+ * Delays execution
101
+ */
102
+ declare function delay(ms: number): Promise<void>;
103
+
104
+ /**
105
+ * LLM client management for metric evaluation
106
+ *
107
+ * Provides a global LLM client that can be configured once and used
108
+ * across all LLM-based metrics, with support for per-call overrides.
109
+ */
110
+
111
+ /**
112
+ * Sets the global LLM client for all metrics
113
+ *
114
+ * @example
115
+ * ```ts
116
+ * import { setLLMClient } from "evalsense/metrics";
117
+ *
118
+ * setLLMClient({
119
+ * async complete(prompt) {
120
+ * return await yourLLM.generate(prompt);
121
+ * }
122
+ * });
123
+ * ```
124
+ */
125
+ declare function setLLMClient(client: LLMClient): void;
126
+ /**
127
+ * Gets the current global LLM client
128
+ *
129
+ * @returns The global client or null if not set
130
+ */
131
+ declare function getLLMClient(): LLMClient | null;
132
+ /**
133
+ * Resets the global LLM client
134
+ *
135
+ * Useful for testing or switching between different LLM providers.
136
+ */
137
+ declare function resetLLMClient(): void;
138
+ /**
139
+ * Validates that an LLM client is available
140
+ *
141
+ * @param client - Optional client override
142
+ * @param metricName - Name of the metric for error messages
143
+ * @throws Error if no client is configured
144
+ * @returns The client to use (override or global)
145
+ */
146
+ declare function requireLLMClient(client: LLMClient | undefined, metricName: string): LLMClient;
147
+
148
+ /**
149
+ * Utilities for LLM-based metric evaluation
150
+ *
151
+ * Provides helpers for prompt templating, response parsing, validation, and error handling.
152
+ */
153
+
154
+ /**
155
+ * Fills a prompt template with variables
156
+ *
157
+ * @example
158
+ * ```ts
159
+ * const prompt = fillPrompt(
160
+ * "Context: {context}\nOutput: {output}",
161
+ * { context: "Paris is the capital", output: "France's capital is Paris" }
162
+ * );
163
+ * ```
164
+ */
165
+ declare function fillPrompt(template: string, variables: Record<string, string>): string;
166
+ /**
167
+ * Parses a JSON response from an LLM, with fallback handling
168
+ *
169
+ * Handles:
170
+ * - Plain JSON strings
171
+ * - JSON wrapped in markdown code blocks
172
+ * - Malformed JSON with helpful error messages
173
+ *
174
+ * @example
175
+ * ```ts
176
+ * const result = parseJSONResponse<{ score: number }>(llmResponse);
177
+ * ```
178
+ */
179
+ declare function parseJSONResponse<T>(response: string): T;
180
+ /**
181
+ * Validates that a parsed JSON response has required fields
182
+ *
183
+ * @example
184
+ * ```ts
185
+ * validateResponse(result, ["score", "reasoning"], "hallucination");
186
+ * ```
187
+ */
188
+ declare function validateResponse(response: unknown, requiredFields: string[], metricName: string): void;
189
+ /**
190
+ * Extracts a score from various formats (number, string, object with score field)
191
+ */
192
+ declare function extractScore(value: unknown, defaultScore?: number): number;
193
+ /**
194
+ * Creates a JSON schema for structured LLM outputs
195
+ *
196
+ * @example
197
+ * ```ts
198
+ * const schema = createJSONSchema({
199
+ * score: "number",
200
+ * reasoning: "string"
201
+ * });
202
+ * ```
203
+ */
204
+ declare function createJSONSchema(properties: Record<string, string>, required?: string[]): JSONSchema;
205
+ /**
206
+ * Batches an array of items into chunks
207
+ *
208
+ * Useful for batch evaluation mode to control batch size.
209
+ */
210
+ declare function batchItems<T>(items: T[], batchSize: number): T[][];
211
+ /**
212
+ * Creates a consistent error message for LLM metric failures
213
+ */
214
+ declare function createLLMError(metricName: string, operation: string, error: unknown, context?: {
215
+ id?: string;
216
+ index?: number;
217
+ }): Error;
218
+ /**
219
+ * Waits for a promise with a timeout
220
+ */
221
+ declare function withTimeout<T>(promise: Promise<T>, timeoutMs: number, operation: string): Promise<T>;
222
+
223
+ /**
224
+ * Mock LLM client for testing
225
+ *
226
+ * Provides a configurable mock implementation of LLMClient for unit tests.
227
+ */
228
+
229
+ /**
230
+ * Configuration for mock LLM client
231
+ */
232
+ interface MockLLMConfig {
233
+ /** Fixed response to return (can be string or object for JSON mode) */
234
+ response?: string | Record<string, unknown>;
235
+ /** Multiple responses for sequential calls */
236
+ responses?: Array<string | Record<string, unknown>>;
237
+ /** Delay in milliseconds before responding */
238
+ delay?: number;
239
+ /** Whether to throw an error */
240
+ shouldError?: boolean;
241
+ /** Error message to throw */
242
+ errorMessage?: string;
243
+ /** Function to validate prompts */
244
+ onPrompt?: (prompt: string) => void;
245
+ }
246
+ /**
247
+ * Creates a mock LLM client for testing
248
+ *
249
+ * @example
250
+ * ```ts
251
+ * const mock = createMockLLMClient({
252
+ * response: JSON.stringify({ score: 0.8, reasoning: "test" }),
253
+ * delay: 100
254
+ * });
255
+ *
256
+ * setLLMClient(mock);
257
+ * ```
258
+ */
259
+ declare function createMockLLMClient(config?: MockLLMConfig): LLMClient;
260
+ /**
261
+ * Creates a mock client that returns sequential responses
262
+ *
263
+ * Useful for testing multiple calls with different responses.
264
+ *
265
+ * @example
266
+ * ```ts
267
+ * const mock = createSequentialMockClient([
268
+ * { score: 0.2, reasoning: "First call" },
269
+ * { score: 0.8, reasoning: "Second call" }
270
+ * ]);
271
+ * ```
272
+ */
273
+ declare function createSequentialMockClient(responses: Array<string | Record<string, unknown>>, options?: {
274
+ delay?: number;
275
+ }): LLMClient;
276
+ /**
277
+ * Creates a mock client that always errors
278
+ *
279
+ * Useful for testing error handling.
280
+ */
281
+ declare function createErrorMockClient(errorMessage?: string): LLMClient;
282
+ /**
283
+ * Creates a spy mock client that records all prompts
284
+ *
285
+ * Useful for testing what prompts are being sent to the LLM.
286
+ *
287
+ * @example
288
+ * ```ts
289
+ * const { client, prompts } = createSpyMockClient({ score: 0.5 });
290
+ * await metric({ outputs, context, llmClient: client });
291
+ * console.log(prompts); // See all prompts that were sent
292
+ * ```
293
+ */
294
+ declare function createSpyMockClient(response: string | Record<string, unknown>): {
295
+ client: LLMClient;
296
+ prompts: string[];
297
+ };
298
+
299
+ export { BINARY_THRESHOLDS, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };