npm - evalsense - Versions diffs - 0.3.2 → 0.4.0 - Mend

evalsense 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/README.md +235 -98
package/dist/{chunk-BFGA2NUB.cjs → chunk-4BKZPVY4.cjs} +13 -6
package/dist/chunk-4BKZPVY4.cjs.map +1 -0
package/dist/{chunk-IYLSY7NX.js → chunk-IUVDDMJ3.js} +13 -6
package/dist/chunk-IUVDDMJ3.js.map +1 -0
package/dist/chunk-NCCQRZ2Y.cjs +1141 -0
package/dist/chunk-NCCQRZ2Y.cjs.map +1 -0
package/dist/chunk-TDGWDK2L.js +1108 -0
package/dist/chunk-TDGWDK2L.js.map +1 -0
package/dist/cli.cjs +11 -11
package/dist/cli.js +1 -1
package/dist/index-CATqAHNK.d.cts +416 -0
package/dist/index-CoMpaW-K.d.ts +416 -0
package/dist/index.cjs +507 -580
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +210 -161
package/dist/index.d.ts +210 -161
package/dist/index.js +455 -524
package/dist/index.js.map +1 -1
package/dist/metrics/index.cjs +103 -342
package/dist/metrics/index.cjs.map +1 -1
package/dist/metrics/index.d.cts +260 -31
package/dist/metrics/index.d.ts +260 -31
package/dist/metrics/index.js +24 -312
package/dist/metrics/index.js.map +1 -1
package/dist/metrics/opinionated/index.cjs +5 -5
package/dist/metrics/opinionated/index.d.cts +2 -163
package/dist/metrics/opinionated/index.d.ts +2 -163
package/dist/metrics/opinionated/index.js +1 -1
package/dist/{types-C71p0wzM.d.cts → types-D0hzfyKm.d.cts} +1 -13
package/dist/{types-C71p0wzM.d.ts → types-D0hzfyKm.d.ts} +1 -13
package/package.json +1 -1
package/dist/chunk-BFGA2NUB.cjs.map +0 -1
package/dist/chunk-IYLSY7NX.js.map +0 -1
package/dist/chunk-RZFLCWTW.cjs +0 -942
package/dist/chunk-RZFLCWTW.cjs.map +0 -1
package/dist/chunk-Z3U6AUWX.js +0 -925
package/dist/chunk-Z3U6AUWX.js.map +0 -1

package/dist/metrics/index.d.cts CHANGED Viewed

@@ -1,48 +1,99 @@
-export { FaithfulnessConfig, HallucinationConfig, RelevanceConfig, ToxicityConfig, faithfulness, hallucination, relevance, toxicity } from './opinionated/index.cjs';
-import { M as MetricFn, a as MetricConfig, b as MetricOutput, L as LLMClient, J as JSONSchema } from '../types-C71p0wzM.cjs';
+import { L as LLMMetricConfig, a as LLMMetric } from '../index-CATqAHNK.cjs';
+export { E as EvalRecord, I as InputSpec, b as LLMMetricOptions, c as LabelThreshold, R as ResponseFieldType, f as faithfulness, h as hallucination, r as relevance, t as toxicity } from '../index-CATqAHNK.cjs';
+import { M as MetricFn, a as MetricOutput, L as LLMClient, J as JSONSchema } from '../types-D0hzfyKm.cjs';
 /**
- * Custom metric registration
+ * Factory function for creating LLM-based metrics
+ *
+ * Reduces metric definition from 90+ lines to ~15 lines with a declarative API.
+ * Eliminates parallel array matching and provides unified record input.
+ *
+ * @example
+ * ```ts
+ * const answerCorrectness = createLLMMetric({
+ *   name: "answer-correctness",
+ *   inputs: ["output", "reference"],
+ *   prompt: ANSWER_CORRECTNESS_PROMPT,
+ *   responseFields: { score: "number", reasoning: "string" },
+ *   labels: [
+ *     { min: 0.8, label: "correct" },
+ *     { min: 0.5, label: "partial" },
+ *     { min: 0, label: "incorrect" },
+ *   ],
+ * });
+ *
+ * // Usage with unified records
+ * const results = await answerCorrectness([
+ *   { id: "1", output: "Paris", reference: "Paris is the capital" },
+ * ]);
+ * ```
  */
 /**
- * Registers a custom metric
+ * Creates an LLM-based metric function
+ *
+ * This factory function eliminates boilerplate by:
+ * - Handling LLM client validation
+ * - Managing structured output with fallback to text parsing
+ * - Normalizing scores to 0-1 range
+ * - Converting scores to labels using thresholds
+ * - Supporting both per-row and batch evaluation modes
+ * - Providing consistent error handling
+ *
+ * @param config - Metric configuration
+ * @returns A metric function that takes unified records
  *
  * @example
  * ```ts
- * registerMetric("custom-relevance", async ({ outputs, query }) => {
- *   // Custom evaluation logic
- *   return outputs.map(o => ({
- *     id: o.id,
- *     metric: "custom-relevance",
- *     score: evaluateRelevance(o.output, query),
- *   }));
+ * // Create a custom LLM metric
+ * const myMetric = createLLMMetric({
+ *   name: "my-metric",
+ *   inputs: ["output", "reference"],
+ *   prompt: `
+ *     Reference: {reference}
+ *     Output: {output}
+ *
+ *     Evaluate the output against the reference...
+ *   `,
+ *   responseFields: { score: "number", reasoning: "string" },
+ *   labels: [
+ *     { min: 0.7, label: "good" },
+ *     { min: 0.4, label: "fair" },
+ *     { min: 0, label: "poor" },
+ *   ],
  * });
+ *
+ * // Use with unified records
+ * const results = await myMetric([
+ *   { id: "1", output: "answer A", reference: "correct answer" },
+ *   { id: "2", output: "answer B", reference: "expected B" },
+ * ]);
  * ```
  */
-declare function registerMetric(name: string, fn: MetricFn): void;
-/**
- * Gets a registered custom metric
- */
-declare function getMetric(name: string): MetricFn | undefined;
-/**
- * Runs a registered metric
- */
-declare function runMetric(name: string, config: MetricConfig): Promise<MetricOutput[]>;
-/**
- * Lists all registered custom metrics
- */
-declare function listMetrics(): string[];
-/**
- * Unregisters a metric (mainly for testing)
- */
-declare function unregisterMetric(name: string): boolean;
+declare function createLLMMetric(config: LLMMetricConfig): LLMMetric;
 /**
- * Clears all registered metrics (mainly for testing)
+ * Custom metric utilities
+ *
+ * Provides simple pattern-based and keyword-based metrics for non-LLM use cases.
+ * For LLM-based custom metrics, use createLLMMetric() from evalsense/metrics.
  */
-declare function clearMetrics(): void;
 /**
  * Creates a simple string-matching metric
+ *
+ * @example
+ * ```ts
+ * const containsCodeMetric = createPatternMetric("contains-code", [
+ *   /```[\s\S]*?```/,
+ *   /function\s+\w+\s*\(/,
+ *   /const\s+\w+\s*=/,
+ * ]);
+ *
+ * const results = await containsCodeMetric({
+ *   outputs: [{ id: "1", output: "const x = 5" }]
+ * });
+ * ```
  */
 declare function createPatternMetric(name: string, patterns: RegExp[], options?: {
     matchScore?: number;
@@ -50,6 +101,19 @@ declare function createPatternMetric(name: string, patterns: RegExp[], options?:
 }): MetricFn;
 /**
  * Creates a keyword-based metric
+ *
+ * @example
+ * ```ts
+ * const techTermsMetric = createKeywordMetric("tech-terms", [
+ *   "machine learning",
+ *   "neural network",
+ *   "algorithm",
+ * ], { threshold: 0.3 });
+ *
+ * const results = await techTermsMetric({
+ *   outputs: [{ id: "1", output: "This uses a neural network algorithm." }]
+ * });
+ * ```
  */
 declare function createKeywordMetric(name: string, keywords: string[], options?: {
     caseSensitive?: boolean;
@@ -108,6 +172,13 @@ declare function delay(ms: number): Promise<void>;
  * across all LLM-based metrics, with support for per-call overrides.
  */
+/**
+ * Global defaults for LLM metrics
+ */
+interface LLMDefaults {
+    /** Default evaluation mode for all metrics */
+    evaluationMode?: "per-row" | "batch";
+}
 /**
  * Sets the global LLM client for all metrics
  *
@@ -144,6 +215,122 @@ declare function resetLLMClient(): void;
  * @returns The client to use (override or global)
  */
 declare function requireLLMClient(client: LLMClient | undefined, metricName: string): LLMClient;
+/**
+ * Executes a function with a scoped LLM client
+ *
+ * The client is automatically restored after the function completes,
+ * even if an error is thrown. This is ideal for testing scenarios
+ * where you want to use a mock client without affecting other tests.
+ *
+ * @param client - The LLM client to use for this scope
+ * @param fn - The async function to execute with the scoped client
+ * @returns The result of the function
+ *
+ * @example
+ * ```ts
+ * // No need for beforeEach(() => resetLLMClient())
+ * it("test with mock client", async () => {
+ *   const result = await withLLMClient(mockClient, async () => {
+ *     return hallucination([{ id: "1", output: "test", context: "ctx" }]);
+ *   });
+ *   expect(result[0].score).toBe(0.5);
+ * });
+ * ```
+ */
+declare function withLLMClient<T>(client: LLMClient, fn: () => Promise<T>): Promise<T>;
+/**
+ * Sets global defaults for LLM metrics
+ *
+ * @param defaults - Default options to apply to all metrics
+ *
+ * @example
+ * ```ts
+ * // Make all metrics use batch mode by default
+ * setDefaults({ evaluationMode: "batch" });
+ * ```
+ */
+declare function setDefaults(defaults: LLMDefaults): void;
+/**
+ * Gets the current global defaults
+ *
+ * @returns Current global defaults
+ */
+declare function getDefaults(): LLMDefaults;
+/**
+ * Resets global defaults to empty
+ */
+declare function resetDefaults(): void;
+/**
+ * Provider types for configureLLM
+ */
+type LLMProvider = "openai" | "anthropic" | "openrouter" | "custom";
+/**
+ * Options for configureLLM
+ */
+interface ConfigureLLMOptions {
+    /** LLM provider to use */
+    provider: LLMProvider;
+    /** API key (auto-detects from environment if not provided) */
+    apiKey?: string;
+    /** Model to use (provider-specific defaults apply if not set) */
+    model?: string;
+    /** Temperature for generation */
+    temperature?: number;
+    /** Max tokens per completion */
+    maxTokens?: number;
+    /** Custom client (required when provider is "custom") */
+    client?: LLMClient;
+    /** Global defaults to set */
+    defaults?: LLMDefaults;
+}
+/**
+ * Options for auto-detection
+ */
+interface ConfigureLLMAutoOptions {
+    /** Model to use (optional, uses provider default if not set) */
+    model?: string;
+    /** Temperature for generation */
+    temperature?: number;
+    /** Max tokens per completion */
+    maxTokens?: number;
+    /** Global defaults to set */
+    defaults?: LLMDefaults;
+}
+/**
+ * One-step LLM configuration
+ *
+ * Simplifies LLM setup by combining adapter creation and client setting
+ * into a single call with environment variable auto-detection.
+ *
+ * @param options - Configuration options
+ * @returns The configured LLM client
+ *
+ * @example
+ * ```ts
+ * // Explicit provider (API key from environment)
+ * configureLLM({ provider: "openai", model: "gpt-4" });
+ *
+ * // With explicit API key
+ * configureLLM({
+ *   provider: "anthropic",
+ *   apiKey: "sk-ant-...",
+ *   model: "claude-3-5-sonnet-20241022"
+ * });
+ *
+ * // With global defaults
+ * configureLLM({
+ *   provider: "openai",
+ *   defaults: { evaluationMode: "batch" }
+ * });
+ *
+ * // Custom client
+ * configureLLM({ provider: "custom", client: myClient });
+ * ```
+ */
+declare function configureLLM(options: ConfigureLLMOptions): LLMClient;
+declare namespace configureLLM {
+    var auto: (options?: ConfigureLLMAutoOptions) => LLMClient;
+}
 /**
  * Utilities for LLM-based metric evaluation
@@ -547,4 +734,46 @@ interface OpenRouterAdapterOptions {
  */
 declare function createOpenRouterAdapter(apiKey: string, options?: OpenRouterAdapterOptions): LLMClient;
-export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
+/**
+ * Metrics module - entry point for evalsense/metrics
+ *
+ * Provides LLM-based metrics, custom metric utilities, and LLM client management.
+ */
+/**
+ * Testing utilities for LLM metrics
+ *
+ * Provides convenient access to all testing-related functions in one namespace.
+ *
+ * @example
+ * ```ts
+ * import { testing } from "evalsense/metrics";
+ *
+ * describe("My tests", () => {
+ *   beforeEach(testing.reset);
+ *
+ *   it("test with mock", async () => {
+ *     const result = await testing.withClient(
+ *       testing.mock({ response: { score: 0.8 } }),
+ *       async () => hallucination([...])
+ *     );
+ *   });
+ * });
+ * ```
+ */
+declare const testing: {
+    /** Resets global LLM client and defaults */
+    reset: () => void;
+    /** Creates a mock LLM client */
+    mock: typeof createMockLLMClient;
+    /** Executes function with scoped LLM client */
+    withClient: typeof withLLMClient;
+    /** Creates a mock client that returns sequential responses */
+    sequentialMock: typeof createSequentialMockClient;
+    /** Creates a mock client that always errors */
+    errorMock: typeof createErrorMockClient;
+    /** Creates a spy mock client that records all prompts */
+    spyMock: typeof createSpyMockClient;
+};
+export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type ConfigureLLMAutoOptions, type ConfigureLLMOptions, type LLMDefaults, LLMMetric, LLMMetricConfig, type LLMProvider, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, configureLLM, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createLLMMetric, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getDefaults, getLLMClient, normalizeScore, parseJSONResponse, requireLLMClient, resetDefaults, resetLLMClient, scoreToLabel, setDefaults, setLLMClient, testing, validateResponse, withLLMClient, withTimeout };

package/dist/metrics/index.d.ts CHANGED Viewed

@@ -1,48 +1,99 @@
-export { FaithfulnessConfig, HallucinationConfig, RelevanceConfig, ToxicityConfig, faithfulness, hallucination, relevance, toxicity } from './opinionated/index.js';
-import { M as MetricFn, a as MetricConfig, b as MetricOutput, L as LLMClient, J as JSONSchema } from '../types-C71p0wzM.js';
+import { L as LLMMetricConfig, a as LLMMetric } from '../index-CoMpaW-K.js';
+export { E as EvalRecord, I as InputSpec, b as LLMMetricOptions, c as LabelThreshold, R as ResponseFieldType, f as faithfulness, h as hallucination, r as relevance, t as toxicity } from '../index-CoMpaW-K.js';
+import { M as MetricFn, a as MetricOutput, L as LLMClient, J as JSONSchema } from '../types-D0hzfyKm.js';
 /**
- * Custom metric registration
+ * Factory function for creating LLM-based metrics
+ *
+ * Reduces metric definition from 90+ lines to ~15 lines with a declarative API.
+ * Eliminates parallel array matching and provides unified record input.
+ *
+ * @example
+ * ```ts
+ * const answerCorrectness = createLLMMetric({
+ *   name: "answer-correctness",
+ *   inputs: ["output", "reference"],
+ *   prompt: ANSWER_CORRECTNESS_PROMPT,
+ *   responseFields: { score: "number", reasoning: "string" },
+ *   labels: [
+ *     { min: 0.8, label: "correct" },
+ *     { min: 0.5, label: "partial" },
+ *     { min: 0, label: "incorrect" },
+ *   ],
+ * });
+ *
+ * // Usage with unified records
+ * const results = await answerCorrectness([
+ *   { id: "1", output: "Paris", reference: "Paris is the capital" },
+ * ]);
+ * ```
  */
 /**
- * Registers a custom metric
+ * Creates an LLM-based metric function
+ *
+ * This factory function eliminates boilerplate by:
+ * - Handling LLM client validation
+ * - Managing structured output with fallback to text parsing
+ * - Normalizing scores to 0-1 range
+ * - Converting scores to labels using thresholds
+ * - Supporting both per-row and batch evaluation modes
+ * - Providing consistent error handling
+ *
+ * @param config - Metric configuration
+ * @returns A metric function that takes unified records
  *
  * @example
  * ```ts
- * registerMetric("custom-relevance", async ({ outputs, query }) => {
- *   // Custom evaluation logic
- *   return outputs.map(o => ({
- *     id: o.id,
- *     metric: "custom-relevance",
- *     score: evaluateRelevance(o.output, query),
- *   }));
+ * // Create a custom LLM metric
+ * const myMetric = createLLMMetric({
+ *   name: "my-metric",
+ *   inputs: ["output", "reference"],
+ *   prompt: `
+ *     Reference: {reference}
+ *     Output: {output}
+ *
+ *     Evaluate the output against the reference...
+ *   `,
+ *   responseFields: { score: "number", reasoning: "string" },
+ *   labels: [
+ *     { min: 0.7, label: "good" },
+ *     { min: 0.4, label: "fair" },
+ *     { min: 0, label: "poor" },
+ *   ],
  * });
+ *
+ * // Use with unified records
+ * const results = await myMetric([
+ *   { id: "1", output: "answer A", reference: "correct answer" },
+ *   { id: "2", output: "answer B", reference: "expected B" },
+ * ]);
  * ```
  */
-declare function registerMetric(name: string, fn: MetricFn): void;
-/**
- * Gets a registered custom metric
- */
-declare function getMetric(name: string): MetricFn | undefined;
-/**
- * Runs a registered metric
- */
-declare function runMetric(name: string, config: MetricConfig): Promise<MetricOutput[]>;
-/**
- * Lists all registered custom metrics
- */
-declare function listMetrics(): string[];
-/**
- * Unregisters a metric (mainly for testing)
- */
-declare function unregisterMetric(name: string): boolean;
+declare function createLLMMetric(config: LLMMetricConfig): LLMMetric;
 /**
- * Clears all registered metrics (mainly for testing)
+ * Custom metric utilities
+ *
+ * Provides simple pattern-based and keyword-based metrics for non-LLM use cases.
+ * For LLM-based custom metrics, use createLLMMetric() from evalsense/metrics.
  */
-declare function clearMetrics(): void;
 /**
  * Creates a simple string-matching metric
+ *
+ * @example
+ * ```ts
+ * const containsCodeMetric = createPatternMetric("contains-code", [
+ *   /```[\s\S]*?```/,
+ *   /function\s+\w+\s*\(/,
+ *   /const\s+\w+\s*=/,
+ * ]);
+ *
+ * const results = await containsCodeMetric({
+ *   outputs: [{ id: "1", output: "const x = 5" }]
+ * });
+ * ```
  */
 declare function createPatternMetric(name: string, patterns: RegExp[], options?: {
     matchScore?: number;
@@ -50,6 +101,19 @@ declare function createPatternMetric(name: string, patterns: RegExp[], options?:
 }): MetricFn;
 /**
  * Creates a keyword-based metric
+ *
+ * @example
+ * ```ts
+ * const techTermsMetric = createKeywordMetric("tech-terms", [
+ *   "machine learning",
+ *   "neural network",
+ *   "algorithm",
+ * ], { threshold: 0.3 });
+ *
+ * const results = await techTermsMetric({
+ *   outputs: [{ id: "1", output: "This uses a neural network algorithm." }]
+ * });
+ * ```
  */
 declare function createKeywordMetric(name: string, keywords: string[], options?: {
     caseSensitive?: boolean;
@@ -108,6 +172,13 @@ declare function delay(ms: number): Promise<void>;
  * across all LLM-based metrics, with support for per-call overrides.
  */
+/**
+ * Global defaults for LLM metrics
+ */
+interface LLMDefaults {
+    /** Default evaluation mode for all metrics */
+    evaluationMode?: "per-row" | "batch";
+}
 /**
  * Sets the global LLM client for all metrics
  *
@@ -144,6 +215,122 @@ declare function resetLLMClient(): void;
  * @returns The client to use (override or global)
  */
 declare function requireLLMClient(client: LLMClient | undefined, metricName: string): LLMClient;
+/**
+ * Executes a function with a scoped LLM client
+ *
+ * The client is automatically restored after the function completes,
+ * even if an error is thrown. This is ideal for testing scenarios
+ * where you want to use a mock client without affecting other tests.
+ *
+ * @param client - The LLM client to use for this scope
+ * @param fn - The async function to execute with the scoped client
+ * @returns The result of the function
+ *
+ * @example
+ * ```ts
+ * // No need for beforeEach(() => resetLLMClient())
+ * it("test with mock client", async () => {
+ *   const result = await withLLMClient(mockClient, async () => {
+ *     return hallucination([{ id: "1", output: "test", context: "ctx" }]);
+ *   });
+ *   expect(result[0].score).toBe(0.5);
+ * });
+ * ```
+ */
+declare function withLLMClient<T>(client: LLMClient, fn: () => Promise<T>): Promise<T>;
+/**
+ * Sets global defaults for LLM metrics
+ *
+ * @param defaults - Default options to apply to all metrics
+ *
+ * @example
+ * ```ts
+ * // Make all metrics use batch mode by default
+ * setDefaults({ evaluationMode: "batch" });
+ * ```
+ */
+declare function setDefaults(defaults: LLMDefaults): void;
+/**
+ * Gets the current global defaults
+ *
+ * @returns Current global defaults
+ */
+declare function getDefaults(): LLMDefaults;
+/**
+ * Resets global defaults to empty
+ */
+declare function resetDefaults(): void;
+/**
+ * Provider types for configureLLM
+ */
+type LLMProvider = "openai" | "anthropic" | "openrouter" | "custom";
+/**
+ * Options for configureLLM
+ */
+interface ConfigureLLMOptions {
+    /** LLM provider to use */
+    provider: LLMProvider;
+    /** API key (auto-detects from environment if not provided) */
+    apiKey?: string;
+    /** Model to use (provider-specific defaults apply if not set) */
+    model?: string;
+    /** Temperature for generation */
+    temperature?: number;
+    /** Max tokens per completion */
+    maxTokens?: number;
+    /** Custom client (required when provider is "custom") */
+    client?: LLMClient;
+    /** Global defaults to set */
+    defaults?: LLMDefaults;
+}
+/**
+ * Options for auto-detection
+ */
+interface ConfigureLLMAutoOptions {
+    /** Model to use (optional, uses provider default if not set) */
+    model?: string;
+    /** Temperature for generation */
+    temperature?: number;
+    /** Max tokens per completion */
+    maxTokens?: number;
+    /** Global defaults to set */
+    defaults?: LLMDefaults;
+}
+/**
+ * One-step LLM configuration
+ *
+ * Simplifies LLM setup by combining adapter creation and client setting
+ * into a single call with environment variable auto-detection.
+ *
+ * @param options - Configuration options
+ * @returns The configured LLM client
+ *
+ * @example
+ * ```ts
+ * // Explicit provider (API key from environment)
+ * configureLLM({ provider: "openai", model: "gpt-4" });
+ *
+ * // With explicit API key
+ * configureLLM({
+ *   provider: "anthropic",
+ *   apiKey: "sk-ant-...",
+ *   model: "claude-3-5-sonnet-20241022"
+ * });
+ *
+ * // With global defaults
+ * configureLLM({
+ *   provider: "openai",
+ *   defaults: { evaluationMode: "batch" }
+ * });
+ *
+ * // Custom client
+ * configureLLM({ provider: "custom", client: myClient });
+ * ```
+ */
+declare function configureLLM(options: ConfigureLLMOptions): LLMClient;
+declare namespace configureLLM {
+    var auto: (options?: ConfigureLLMAutoOptions) => LLMClient;
+}
 /**
  * Utilities for LLM-based metric evaluation
@@ -547,4 +734,46 @@ interface OpenRouterAdapterOptions {
  */
 declare function createOpenRouterAdapter(apiKey: string, options?: OpenRouterAdapterOptions): LLMClient;
-export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
+/**
+ * Metrics module - entry point for evalsense/metrics
+ *
+ * Provides LLM-based metrics, custom metric utilities, and LLM client management.
+ */
+/**
+ * Testing utilities for LLM metrics
+ *
+ * Provides convenient access to all testing-related functions in one namespace.
+ *
+ * @example
+ * ```ts
+ * import { testing } from "evalsense/metrics";
+ *
+ * describe("My tests", () => {
+ *   beforeEach(testing.reset);
+ *
+ *   it("test with mock", async () => {
+ *     const result = await testing.withClient(
+ *       testing.mock({ response: { score: 0.8 } }),
+ *       async () => hallucination([...])
+ *     );
+ *   });
+ * });
+ * ```
+ */
+declare const testing: {
+    /** Resets global LLM client and defaults */
+    reset: () => void;
+    /** Creates a mock LLM client */
+    mock: typeof createMockLLMClient;
+    /** Executes function with scoped LLM client */
+    withClient: typeof withLLMClient;
+    /** Creates a mock client that returns sequential responses */
+    sequentialMock: typeof createSequentialMockClient;
+    /** Creates a mock client that always errors */
+    errorMock: typeof createErrorMockClient;
+    /** Creates a spy mock client that records all prompts */
+    spyMock: typeof createSpyMockClient;
+};
+export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type ConfigureLLMAutoOptions, type ConfigureLLMOptions, type LLMDefaults, LLMMetric, LLMMetricConfig, type LLMProvider, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, configureLLM, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createLLMMetric, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getDefaults, getLLMClient, normalizeScore, parseJSONResponse, requireLLMClient, resetDefaults, resetLLMClient, scoreToLabel, setDefaults, setLLMClient, testing, validateResponse, withLLMClient, withTimeout };