npm - evalsense - Versions diffs - 0.2.0 → 0.3.0 - Mend

evalsense 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/LICENSE +190 -0
package/README.md +99 -82
package/dist/{chunk-HDJID3GC.cjs → chunk-DFC6FRTG.cjs} +8 -26
package/dist/chunk-DFC6FRTG.cjs.map +1 -0
package/dist/chunk-DGUM43GV.js +10 -0
package/dist/chunk-DGUM43GV.js.map +1 -0
package/dist/chunk-JEQ2X3Z6.cjs +12 -0
package/dist/chunk-JEQ2X3Z6.cjs.map +1 -0
package/dist/{chunk-5P7LNNO6.js → chunk-JPVZL45G.js} +8 -26
package/dist/chunk-JPVZL45G.js.map +1 -0
package/dist/{chunk-Y23VHTD3.cjs → chunk-RZFLCWTW.cjs} +2 -2
package/dist/chunk-RZFLCWTW.cjs.map +1 -0
package/dist/{chunk-BRPM6AB6.js → chunk-Z3U6AUWX.js} +2 -2
package/dist/chunk-Z3U6AUWX.js.map +1 -0
package/dist/cli.cjs +39 -36
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +37 -34
package/dist/cli.js.map +1 -1
package/dist/index.cjs +300 -101
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +76 -6
package/dist/index.d.ts +76 -6
package/dist/index.js +222 -23
package/dist/index.js.map +1 -1
package/dist/metrics/index.cjs +257 -17
package/dist/metrics/index.cjs.map +1 -1
package/dist/metrics/index.d.cts +252 -1
package/dist/metrics/index.d.ts +252 -1
package/dist/metrics/index.js +240 -2
package/dist/metrics/index.js.map +1 -1
package/dist/metrics/opinionated/index.cjs +6 -5
package/dist/metrics/opinionated/index.js +2 -1
package/package.json +8 -6
package/dist/chunk-5P7LNNO6.js.map +0 -1
package/dist/chunk-BRPM6AB6.js.map +0 -1
package/dist/chunk-HDJID3GC.cjs.map +0 -1
package/dist/chunk-Y23VHTD3.cjs.map +0 -1

package/dist/metrics/index.d.cts CHANGED Viewed

@@ -296,4 +296,255 @@ declare function createSpyMockClient(response: string | Record<string, unknown>)
     prompts: string[];
 };
-export { BINARY_THRESHOLDS, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
+/**
+ * Built-in OpenAI adapter for evalsense
+ *
+ * Provides a simple way to use OpenAI models without writing adapter code.
+ *
+ * @example
+ * ```javascript
+ * import { setLLMClient, createOpenAIAdapter } from 'evalsense/metrics';
+ *
+ * setLLMClient(createOpenAIAdapter(process.env.OPENAI_API_KEY, {
+ *   model: 'gpt-4-turbo-preview',
+ *   temperature: 0
+ * }));
+ * ```
+ */
+interface OpenAIAdapterOptions {
+    /**
+     * OpenAI model to use
+     * @default "gpt-4-turbo-preview"
+     */
+    model?: string;
+    /**
+     * Temperature for generation (0-2)
+     * @default 0
+     */
+    temperature?: number;
+    /**
+     * Maximum tokens per completion
+     * @default 4096
+     */
+    maxTokens?: number;
+    /**
+     * API base URL (for Azure OpenAI or proxies)
+     * @default undefined (uses default OpenAI endpoint)
+     */
+    baseURL?: string;
+    /**
+     * Organization ID (optional)
+     */
+    organization?: string;
+    /**
+     * Request timeout in milliseconds
+     * @default 30000
+     */
+    timeout?: number;
+}
+/**
+ * Creates an LLM client adapter for OpenAI.
+ *
+ * **Setup:**
+ * 1. Install OpenAI SDK: `npm install openai`
+ * 2. Get API key from https://platform.openai.com/api-keys
+ * 3. Set environment variable: `export OPENAI_API_KEY="sk-..."`
+ *
+ * **Model Options:**
+ * - `gpt-4-turbo-preview` - Most capable, expensive
+ * - `gpt-4` - High quality, expensive
+ * - `gpt-3.5-turbo` - Fast and cheap (20x cheaper than GPT-4)
+ *
+ * @param apiKey - Your OpenAI API key
+ * @param options - Configuration options
+ * @returns LLM client for use with evalsense metrics
+ *
+ * @example
+ * ```javascript
+ * // Basic usage
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY);
+ * setLLMClient(client);
+ *
+ * // With custom model
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY, {
+ *   model: 'gpt-3.5-turbo',  // Cheaper model
+ *   temperature: 0.3
+ * });
+ *
+ * // With Azure OpenAI
+ * const client = createOpenAIAdapter(process.env.AZURE_OPENAI_KEY, {
+ *   baseURL: 'https://your-resource.openai.azure.com',
+ *   model: 'gpt-4'
+ * });
+ * ```
+ */
+declare function createOpenAIAdapter(apiKey: string, options?: OpenAIAdapterOptions): LLMClient;
+/**
+ * Built-in Anthropic (Claude) adapter for evalsense
+ *
+ * Provides a simple way to use Claude models without writing adapter code.
+ *
+ * @example
+ * ```javascript
+ * import { setLLMClient, createAnthropicAdapter } from 'evalsense/metrics';
+ *
+ * setLLMClient(createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
+ *   model: 'claude-3-5-sonnet-20241022'
+ * }));
+ * ```
+ */
+interface AnthropicAdapterOptions {
+    /**
+     * Anthropic model to use
+     * @default "claude-3-5-sonnet-20241022"
+     */
+    model?: string;
+    /**
+     * Maximum tokens per completion
+     * @default 4096
+     */
+    maxTokens?: number;
+    /**
+     * Temperature for generation (0-1)
+     * Note: Anthropic doesn't support temperature > 1
+     * @default 0
+     */
+    temperature?: number;
+    /**
+     * Request timeout in milliseconds
+     * @default 30000
+     */
+    timeout?: number;
+}
+/**
+ * Creates an LLM client adapter for Anthropic Claude.
+ *
+ * **Setup:**
+ * 1. Install Anthropic SDK: `npm install @anthropic-ai/sdk`
+ * 2. Get API key from https://console.anthropic.com/
+ * 3. Set environment variable: `export ANTHROPIC_API_KEY="sk-ant-..."`
+ *
+ * **Model Options:**
+ * - `claude-3-5-sonnet-20241022` - Latest, most capable (recommended)
+ * - `claude-3-opus-20240229` - Most capable, expensive
+ * - `claude-3-sonnet-20240229` - Balanced performance
+ * - `claude-3-haiku-20240307` - Fast and affordable
+ *
+ * @param apiKey - Your Anthropic API key
+ * @param options - Configuration options
+ * @returns LLM client for use with evalsense metrics
+ *
+ * @example
+ * ```javascript
+ * // Basic usage
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY);
+ * setLLMClient(client);
+ *
+ * // With custom model
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
+ *   model: 'claude-3-haiku-20240307',  // Cheaper, faster model
+ *   maxTokens: 2048
+ * });
+ * ```
+ */
+declare function createAnthropicAdapter(apiKey: string, options?: AnthropicAdapterOptions): LLMClient;
+/**
+ * Built-in OpenRouter adapter for evalsense
+ *
+ * OpenRouter provides access to multiple LLM providers (OpenAI, Anthropic, Google, Meta, etc.)
+ * through a single unified API. Great for comparing models or avoiding vendor lock-in.
+ *
+ * @example
+ * ```javascript
+ * import { setLLMClient, createOpenRouterAdapter } from 'evalsense/metrics';
+ *
+ * setLLMClient(createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
+ *   model: 'anthropic/claude-3.5-sonnet'
+ * }));
+ * ```
+ */
+interface OpenRouterAdapterOptions {
+    /**
+     * Model to use (in format: provider/model-name)
+     *
+     * Popular options:
+     * - `anthropic/claude-3.5-sonnet` - Latest Claude
+     * - `openai/gpt-4-turbo` - GPT-4 Turbo
+     * - `openai/gpt-3.5-turbo` - Cheap and fast
+     * - `google/gemini-pro` - Google Gemini
+     * - `meta-llama/llama-3-70b-instruct` - Open source
+     *
+     * See full list: https://openrouter.ai/models
+     *
+     * @default "anthropic/claude-3.5-sonnet"
+     */
+    model?: string;
+    /**
+     * Temperature for generation (0-2)
+     * @default 0
+     */
+    temperature?: number;
+    /**
+     * Maximum tokens per completion
+     * @default 4096
+     */
+    maxTokens?: number;
+    /**
+     * Your app name (for OpenRouter analytics)
+     * @default "evalsense"
+     */
+    appName?: string;
+    /**
+     * Your app URL (for OpenRouter analytics)
+     */
+    siteUrl?: string;
+    /**
+     * Request timeout in milliseconds
+     * @default 30000
+     */
+    timeout?: number;
+}
+/**
+ * Creates an LLM client adapter for OpenRouter.
+ *
+ * **Setup:**
+ * 1. Get API key from https://openrouter.ai/keys
+ * 2. Set environment variable: `export OPENROUTER_API_KEY="sk-or-..."`
+ * 3. No SDK needed - uses fetch API
+ *
+ * **Benefits:**
+ * - Access 100+ models from one API
+ * - Compare different providers easily
+ * - Automatic fallbacks and retries
+ * - Transparent pricing
+ *
+ * @param apiKey - Your OpenRouter API key
+ * @param options - Configuration options
+ * @returns LLM client for use with evalsense metrics
+ *
+ * @example
+ * ```javascript
+ * // Basic usage
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY);
+ * setLLMClient(client);
+ *
+ * // Use different model
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
+ *   model: 'openai/gpt-3.5-turbo',  // Cheaper option
+ *   appName: 'my-eval-system'
+ * });
+ *
+ * // Use free models for testing
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
+ *   model: 'meta-llama/llama-3-8b-instruct:free'
+ * });
+ * ```
+ */
+declare function createOpenRouterAdapter(apiKey: string, options?: OpenRouterAdapterOptions): LLMClient;
+export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };

package/dist/metrics/index.d.ts CHANGED Viewed

@@ -296,4 +296,255 @@ declare function createSpyMockClient(response: string | Record<string, unknown>)
     prompts: string[];
 };
-export { BINARY_THRESHOLDS, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
+/**
+ * Built-in OpenAI adapter for evalsense
+ *
+ * Provides a simple way to use OpenAI models without writing adapter code.
+ *
+ * @example
+ * ```javascript
+ * import { setLLMClient, createOpenAIAdapter } from 'evalsense/metrics';
+ *
+ * setLLMClient(createOpenAIAdapter(process.env.OPENAI_API_KEY, {
+ *   model: 'gpt-4-turbo-preview',
+ *   temperature: 0
+ * }));
+ * ```
+ */
+interface OpenAIAdapterOptions {
+    /**
+     * OpenAI model to use
+     * @default "gpt-4-turbo-preview"
+     */
+    model?: string;
+    /**
+     * Temperature for generation (0-2)
+     * @default 0
+     */
+    temperature?: number;
+    /**
+     * Maximum tokens per completion
+     * @default 4096
+     */
+    maxTokens?: number;
+    /**
+     * API base URL (for Azure OpenAI or proxies)
+     * @default undefined (uses default OpenAI endpoint)
+     */
+    baseURL?: string;
+    /**
+     * Organization ID (optional)
+     */
+    organization?: string;
+    /**
+     * Request timeout in milliseconds
+     * @default 30000
+     */
+    timeout?: number;
+}
+/**
+ * Creates an LLM client adapter for OpenAI.
+ *
+ * **Setup:**
+ * 1. Install OpenAI SDK: `npm install openai`
+ * 2. Get API key from https://platform.openai.com/api-keys
+ * 3. Set environment variable: `export OPENAI_API_KEY="sk-..."`
+ *
+ * **Model Options:**
+ * - `gpt-4-turbo-preview` - Most capable, expensive
+ * - `gpt-4` - High quality, expensive
+ * - `gpt-3.5-turbo` - Fast and cheap (20x cheaper than GPT-4)
+ *
+ * @param apiKey - Your OpenAI API key
+ * @param options - Configuration options
+ * @returns LLM client for use with evalsense metrics
+ *
+ * @example
+ * ```javascript
+ * // Basic usage
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY);
+ * setLLMClient(client);
+ *
+ * // With custom model
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY, {
+ *   model: 'gpt-3.5-turbo',  // Cheaper model
+ *   temperature: 0.3
+ * });
+ *
+ * // With Azure OpenAI
+ * const client = createOpenAIAdapter(process.env.AZURE_OPENAI_KEY, {
+ *   baseURL: 'https://your-resource.openai.azure.com',
+ *   model: 'gpt-4'
+ * });
+ * ```
+ */
+declare function createOpenAIAdapter(apiKey: string, options?: OpenAIAdapterOptions): LLMClient;
+/**
+ * Built-in Anthropic (Claude) adapter for evalsense
+ *
+ * Provides a simple way to use Claude models without writing adapter code.
+ *
+ * @example
+ * ```javascript
+ * import { setLLMClient, createAnthropicAdapter } from 'evalsense/metrics';
+ *
+ * setLLMClient(createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
+ *   model: 'claude-3-5-sonnet-20241022'
+ * }));
+ * ```
+ */
+interface AnthropicAdapterOptions {
+    /**
+     * Anthropic model to use
+     * @default "claude-3-5-sonnet-20241022"
+     */
+    model?: string;
+    /**
+     * Maximum tokens per completion
+     * @default 4096
+     */
+    maxTokens?: number;
+    /**
+     * Temperature for generation (0-1)
+     * Note: Anthropic doesn't support temperature > 1
+     * @default 0
+     */
+    temperature?: number;
+    /**
+     * Request timeout in milliseconds
+     * @default 30000
+     */
+    timeout?: number;
+}
+/**
+ * Creates an LLM client adapter for Anthropic Claude.
+ *
+ * **Setup:**
+ * 1. Install Anthropic SDK: `npm install @anthropic-ai/sdk`
+ * 2. Get API key from https://console.anthropic.com/
+ * 3. Set environment variable: `export ANTHROPIC_API_KEY="sk-ant-..."`
+ *
+ * **Model Options:**
+ * - `claude-3-5-sonnet-20241022` - Latest, most capable (recommended)
+ * - `claude-3-opus-20240229` - Most capable, expensive
+ * - `claude-3-sonnet-20240229` - Balanced performance
+ * - `claude-3-haiku-20240307` - Fast and affordable
+ *
+ * @param apiKey - Your Anthropic API key
+ * @param options - Configuration options
+ * @returns LLM client for use with evalsense metrics
+ *
+ * @example
+ * ```javascript
+ * // Basic usage
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY);
+ * setLLMClient(client);
+ *
+ * // With custom model
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
+ *   model: 'claude-3-haiku-20240307',  // Cheaper, faster model
+ *   maxTokens: 2048
+ * });
+ * ```
+ */
+declare function createAnthropicAdapter(apiKey: string, options?: AnthropicAdapterOptions): LLMClient;
+/**
+ * Built-in OpenRouter adapter for evalsense
+ *
+ * OpenRouter provides access to multiple LLM providers (OpenAI, Anthropic, Google, Meta, etc.)
+ * through a single unified API. Great for comparing models or avoiding vendor lock-in.
+ *
+ * @example
+ * ```javascript
+ * import { setLLMClient, createOpenRouterAdapter } from 'evalsense/metrics';
+ *
+ * setLLMClient(createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
+ *   model: 'anthropic/claude-3.5-sonnet'
+ * }));
+ * ```
+ */
+interface OpenRouterAdapterOptions {
+    /**
+     * Model to use (in format: provider/model-name)
+     *
+     * Popular options:
+     * - `anthropic/claude-3.5-sonnet` - Latest Claude
+     * - `openai/gpt-4-turbo` - GPT-4 Turbo
+     * - `openai/gpt-3.5-turbo` - Cheap and fast
+     * - `google/gemini-pro` - Google Gemini
+     * - `meta-llama/llama-3-70b-instruct` - Open source
+     *
+     * See full list: https://openrouter.ai/models
+     *
+     * @default "anthropic/claude-3.5-sonnet"
+     */
+    model?: string;
+    /**
+     * Temperature for generation (0-2)
+     * @default 0
+     */
+    temperature?: number;
+    /**
+     * Maximum tokens per completion
+     * @default 4096
+     */
+    maxTokens?: number;
+    /**
+     * Your app name (for OpenRouter analytics)
+     * @default "evalsense"
+     */
+    appName?: string;
+    /**
+     * Your app URL (for OpenRouter analytics)
+     */
+    siteUrl?: string;
+    /**
+     * Request timeout in milliseconds
+     * @default 30000
+     */
+    timeout?: number;
+}
+/**
+ * Creates an LLM client adapter for OpenRouter.
+ *
+ * **Setup:**
+ * 1. Get API key from https://openrouter.ai/keys
+ * 2. Set environment variable: `export OPENROUTER_API_KEY="sk-or-..."`
+ * 3. No SDK needed - uses fetch API
+ *
+ * **Benefits:**
+ * - Access 100+ models from one API
+ * - Compare different providers easily
+ * - Automatic fallbacks and retries
+ * - Transparent pricing
+ *
+ * @param apiKey - Your OpenRouter API key
+ * @param options - Configuration options
+ * @returns LLM client for use with evalsense metrics
+ *
+ * @example
+ * ```javascript
+ * // Basic usage
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY);
+ * setLLMClient(client);
+ *
+ * // Use different model
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
+ *   model: 'openai/gpt-3.5-turbo',  // Cheaper option
+ *   appName: 'my-eval-system'
+ * });
+ *
+ * // Use free models for testing
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
+ *   model: 'meta-llama/llama-3-8b-instruct:free'
+ * });
+ * ```
+ */
+declare function createOpenRouterAdapter(apiKey: string, options?: OpenRouterAdapterOptions): LLMClient;
+export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };