evalsense 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -296,4 +296,255 @@ declare function createSpyMockClient(response: string | Record<string, unknown>)
296
296
  prompts: string[];
297
297
  };
298
298
 
299
- export { BINARY_THRESHOLDS, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
299
+ /**
300
+ * Built-in OpenAI adapter for evalsense
301
+ *
302
+ * Provides a simple way to use OpenAI models without writing adapter code.
303
+ *
304
+ * @example
305
+ * ```javascript
306
+ * import { setLLMClient, createOpenAIAdapter } from 'evalsense/metrics';
307
+ *
308
+ * setLLMClient(createOpenAIAdapter(process.env.OPENAI_API_KEY, {
309
+ * model: 'gpt-4-turbo-preview',
310
+ * temperature: 0
311
+ * }));
312
+ * ```
313
+ */
314
+
315
+ interface OpenAIAdapterOptions {
316
+ /**
317
+ * OpenAI model to use
318
+ * @default "gpt-4-turbo-preview"
319
+ */
320
+ model?: string;
321
+ /**
322
+ * Temperature for generation (0-2)
323
+ * @default 0
324
+ */
325
+ temperature?: number;
326
+ /**
327
+ * Maximum tokens per completion
328
+ * @default 4096
329
+ */
330
+ maxTokens?: number;
331
+ /**
332
+ * API base URL (for Azure OpenAI or proxies)
333
+ * @default undefined (uses default OpenAI endpoint)
334
+ */
335
+ baseURL?: string;
336
+ /**
337
+ * Organization ID (optional)
338
+ */
339
+ organization?: string;
340
+ /**
341
+ * Request timeout in milliseconds
342
+ * @default 30000
343
+ */
344
+ timeout?: number;
345
+ }
346
+ /**
347
+ * Creates an LLM client adapter for OpenAI.
348
+ *
349
+ * **Setup:**
350
+ * 1. Install OpenAI SDK: `npm install openai`
351
+ * 2. Get API key from https://platform.openai.com/api-keys
352
+ * 3. Set environment variable: `export OPENAI_API_KEY="sk-..."`
353
+ *
354
+ * **Model Options:**
355
+ * - `gpt-4-turbo-preview` - Most capable, expensive
356
+ * - `gpt-4` - High quality, expensive
357
+ * - `gpt-3.5-turbo` - Fast and cheap (20x cheaper than GPT-4)
358
+ *
359
+ * @param apiKey - Your OpenAI API key
360
+ * @param options - Configuration options
361
+ * @returns LLM client for use with evalsense metrics
362
+ *
363
+ * @example
364
+ * ```javascript
365
+ * // Basic usage
366
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY);
367
+ * setLLMClient(client);
368
+ *
369
+ * // With custom model
370
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY, {
371
+ * model: 'gpt-3.5-turbo', // Cheaper model
372
+ * temperature: 0.3
373
+ * });
374
+ *
375
+ * // With Azure OpenAI
376
+ * const client = createOpenAIAdapter(process.env.AZURE_OPENAI_KEY, {
377
+ * baseURL: 'https://your-resource.openai.azure.com',
378
+ * model: 'gpt-4'
379
+ * });
380
+ * ```
381
+ */
382
+ declare function createOpenAIAdapter(apiKey: string, options?: OpenAIAdapterOptions): LLMClient;
383
+
384
+ /**
385
+ * Built-in Anthropic (Claude) adapter for evalsense
386
+ *
387
+ * Provides a simple way to use Claude models without writing adapter code.
388
+ *
389
+ * @example
390
+ * ```javascript
391
+ * import { setLLMClient, createAnthropicAdapter } from 'evalsense/metrics';
392
+ *
393
+ * setLLMClient(createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
394
+ * model: 'claude-3-5-sonnet-20241022'
395
+ * }));
396
+ * ```
397
+ */
398
+
399
+ interface AnthropicAdapterOptions {
400
+ /**
401
+ * Anthropic model to use
402
+ * @default "claude-3-5-sonnet-20241022"
403
+ */
404
+ model?: string;
405
+ /**
406
+ * Maximum tokens per completion
407
+ * @default 4096
408
+ */
409
+ maxTokens?: number;
410
+ /**
411
+ * Temperature for generation (0-1)
412
+ * Note: Anthropic doesn't support temperature > 1
413
+ * @default 0
414
+ */
415
+ temperature?: number;
416
+ /**
417
+ * Request timeout in milliseconds
418
+ * @default 30000
419
+ */
420
+ timeout?: number;
421
+ }
422
+ /**
423
+ * Creates an LLM client adapter for Anthropic Claude.
424
+ *
425
+ * **Setup:**
426
+ * 1. Install Anthropic SDK: `npm install @anthropic-ai/sdk`
427
+ * 2. Get API key from https://console.anthropic.com/
428
+ * 3. Set environment variable: `export ANTHROPIC_API_KEY="sk-ant-..."`
429
+ *
430
+ * **Model Options:**
431
+ * - `claude-3-5-sonnet-20241022` - Latest, most capable (recommended)
432
+ * - `claude-3-opus-20240229` - Most capable, expensive
433
+ * - `claude-3-sonnet-20240229` - Balanced performance
434
+ * - `claude-3-haiku-20240307` - Fast and affordable
435
+ *
436
+ * @param apiKey - Your Anthropic API key
437
+ * @param options - Configuration options
438
+ * @returns LLM client for use with evalsense metrics
439
+ *
440
+ * @example
441
+ * ```javascript
442
+ * // Basic usage
443
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY);
444
+ * setLLMClient(client);
445
+ *
446
+ * // With custom model
447
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
448
+ * model: 'claude-3-haiku-20240307', // Cheaper, faster model
449
+ * maxTokens: 2048
450
+ * });
451
+ * ```
452
+ */
453
+ declare function createAnthropicAdapter(apiKey: string, options?: AnthropicAdapterOptions): LLMClient;
454
+
455
+ /**
456
+ * Built-in OpenRouter adapter for evalsense
457
+ *
458
+ * OpenRouter provides access to multiple LLM providers (OpenAI, Anthropic, Google, Meta, etc.)
459
+ * through a single unified API. Great for comparing models or avoiding vendor lock-in.
460
+ *
461
+ * @example
462
+ * ```javascript
463
+ * import { setLLMClient, createOpenRouterAdapter } from 'evalsense/metrics';
464
+ *
465
+ * setLLMClient(createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
466
+ * model: 'anthropic/claude-3.5-sonnet'
467
+ * }));
468
+ * ```
469
+ */
470
+
471
+ interface OpenRouterAdapterOptions {
472
+ /**
473
+ * Model to use (in format: provider/model-name)
474
+ *
475
+ * Popular options:
476
+ * - `anthropic/claude-3.5-sonnet` - Latest Claude
477
+ * - `openai/gpt-4-turbo` - GPT-4 Turbo
478
+ * - `openai/gpt-3.5-turbo` - Cheap and fast
479
+ * - `google/gemini-pro` - Google Gemini
480
+ * - `meta-llama/llama-3-70b-instruct` - Open source
481
+ *
482
+ * See full list: https://openrouter.ai/models
483
+ *
484
+ * @default "anthropic/claude-3.5-sonnet"
485
+ */
486
+ model?: string;
487
+ /**
488
+ * Temperature for generation (0-2)
489
+ * @default 0
490
+ */
491
+ temperature?: number;
492
+ /**
493
+ * Maximum tokens per completion
494
+ * @default 4096
495
+ */
496
+ maxTokens?: number;
497
+ /**
498
+ * Your app name (for OpenRouter analytics)
499
+ * @default "evalsense"
500
+ */
501
+ appName?: string;
502
+ /**
503
+ * Your app URL (for OpenRouter analytics)
504
+ */
505
+ siteUrl?: string;
506
+ /**
507
+ * Request timeout in milliseconds
508
+ * @default 30000
509
+ */
510
+ timeout?: number;
511
+ }
512
+ /**
513
+ * Creates an LLM client adapter for OpenRouter.
514
+ *
515
+ * **Setup:**
516
+ * 1. Get API key from https://openrouter.ai/keys
517
+ * 2. Set environment variable: `export OPENROUTER_API_KEY="sk-or-..."`
518
+ * 3. No SDK needed - uses fetch API
519
+ *
520
+ * **Benefits:**
521
+ * - Access 100+ models from one API
522
+ * - Compare different providers easily
523
+ * - Automatic fallbacks and retries
524
+ * - Transparent pricing
525
+ *
526
+ * @param apiKey - Your OpenRouter API key
527
+ * @param options - Configuration options
528
+ * @returns LLM client for use with evalsense metrics
529
+ *
530
+ * @example
531
+ * ```javascript
532
+ * // Basic usage
533
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY);
534
+ * setLLMClient(client);
535
+ *
536
+ * // Use different model
537
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
538
+ * model: 'openai/gpt-3.5-turbo', // Cheaper option
539
+ * appName: 'my-eval-system'
540
+ * });
541
+ *
542
+ * // Use free models for testing
543
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
544
+ * model: 'meta-llama/llama-3-8b-instruct:free'
545
+ * });
546
+ * ```
547
+ */
548
+ declare function createOpenRouterAdapter(apiKey: string, options?: OpenRouterAdapterOptions): LLMClient;
549
+
550
+ export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
@@ -296,4 +296,255 @@ declare function createSpyMockClient(response: string | Record<string, unknown>)
296
296
  prompts: string[];
297
297
  };
298
298
 
299
- export { BINARY_THRESHOLDS, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };
299
+ /**
300
+ * Built-in OpenAI adapter for evalsense
301
+ *
302
+ * Provides a simple way to use OpenAI models without writing adapter code.
303
+ *
304
+ * @example
305
+ * ```javascript
306
+ * import { setLLMClient, createOpenAIAdapter } from 'evalsense/metrics';
307
+ *
308
+ * setLLMClient(createOpenAIAdapter(process.env.OPENAI_API_KEY, {
309
+ * model: 'gpt-4-turbo-preview',
310
+ * temperature: 0
311
+ * }));
312
+ * ```
313
+ */
314
+
315
+ interface OpenAIAdapterOptions {
316
+ /**
317
+ * OpenAI model to use
318
+ * @default "gpt-4-turbo-preview"
319
+ */
320
+ model?: string;
321
+ /**
322
+ * Temperature for generation (0-2)
323
+ * @default 0
324
+ */
325
+ temperature?: number;
326
+ /**
327
+ * Maximum tokens per completion
328
+ * @default 4096
329
+ */
330
+ maxTokens?: number;
331
+ /**
332
+ * API base URL (for Azure OpenAI or proxies)
333
+ * @default undefined (uses default OpenAI endpoint)
334
+ */
335
+ baseURL?: string;
336
+ /**
337
+ * Organization ID (optional)
338
+ */
339
+ organization?: string;
340
+ /**
341
+ * Request timeout in milliseconds
342
+ * @default 30000
343
+ */
344
+ timeout?: number;
345
+ }
346
+ /**
347
+ * Creates an LLM client adapter for OpenAI.
348
+ *
349
+ * **Setup:**
350
+ * 1. Install OpenAI SDK: `npm install openai`
351
+ * 2. Get API key from https://platform.openai.com/api-keys
352
+ * 3. Set environment variable: `export OPENAI_API_KEY="sk-..."`
353
+ *
354
+ * **Model Options:**
355
+ * - `gpt-4-turbo-preview` - Most capable, expensive
356
+ * - `gpt-4` - High quality, expensive
357
+ * - `gpt-3.5-turbo` - Fast and cheap (20x cheaper than GPT-4)
358
+ *
359
+ * @param apiKey - Your OpenAI API key
360
+ * @param options - Configuration options
361
+ * @returns LLM client for use with evalsense metrics
362
+ *
363
+ * @example
364
+ * ```javascript
365
+ * // Basic usage
366
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY);
367
+ * setLLMClient(client);
368
+ *
369
+ * // With custom model
370
+ * const client = createOpenAIAdapter(process.env.OPENAI_API_KEY, {
371
+ * model: 'gpt-3.5-turbo', // Cheaper model
372
+ * temperature: 0.3
373
+ * });
374
+ *
375
+ * // With Azure OpenAI
376
+ * const client = createOpenAIAdapter(process.env.AZURE_OPENAI_KEY, {
377
+ * baseURL: 'https://your-resource.openai.azure.com',
378
+ * model: 'gpt-4'
379
+ * });
380
+ * ```
381
+ */
382
+ declare function createOpenAIAdapter(apiKey: string, options?: OpenAIAdapterOptions): LLMClient;
383
+
384
+ /**
385
+ * Built-in Anthropic (Claude) adapter for evalsense
386
+ *
387
+ * Provides a simple way to use Claude models without writing adapter code.
388
+ *
389
+ * @example
390
+ * ```javascript
391
+ * import { setLLMClient, createAnthropicAdapter } from 'evalsense/metrics';
392
+ *
393
+ * setLLMClient(createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
394
+ * model: 'claude-3-5-sonnet-20241022'
395
+ * }));
396
+ * ```
397
+ */
398
+
399
+ interface AnthropicAdapterOptions {
400
+ /**
401
+ * Anthropic model to use
402
+ * @default "claude-3-5-sonnet-20241022"
403
+ */
404
+ model?: string;
405
+ /**
406
+ * Maximum tokens per completion
407
+ * @default 4096
408
+ */
409
+ maxTokens?: number;
410
+ /**
411
+ * Temperature for generation (0-1)
412
+ * Note: Anthropic doesn't support temperature > 1
413
+ * @default 0
414
+ */
415
+ temperature?: number;
416
+ /**
417
+ * Request timeout in milliseconds
418
+ * @default 30000
419
+ */
420
+ timeout?: number;
421
+ }
422
+ /**
423
+ * Creates an LLM client adapter for Anthropic Claude.
424
+ *
425
+ * **Setup:**
426
+ * 1. Install Anthropic SDK: `npm install @anthropic-ai/sdk`
427
+ * 2. Get API key from https://console.anthropic.com/
428
+ * 3. Set environment variable: `export ANTHROPIC_API_KEY="sk-ant-..."`
429
+ *
430
+ * **Model Options:**
431
+ * - `claude-3-5-sonnet-20241022` - Latest, most capable (recommended)
432
+ * - `claude-3-opus-20240229` - Most capable, expensive
433
+ * - `claude-3-sonnet-20240229` - Balanced performance
434
+ * - `claude-3-haiku-20240307` - Fast and affordable
435
+ *
436
+ * @param apiKey - Your Anthropic API key
437
+ * @param options - Configuration options
438
+ * @returns LLM client for use with evalsense metrics
439
+ *
440
+ * @example
441
+ * ```javascript
442
+ * // Basic usage
443
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY);
444
+ * setLLMClient(client);
445
+ *
446
+ * // With custom model
447
+ * const client = createAnthropicAdapter(process.env.ANTHROPIC_API_KEY, {
448
+ * model: 'claude-3-haiku-20240307', // Cheaper, faster model
449
+ * maxTokens: 2048
450
+ * });
451
+ * ```
452
+ */
453
+ declare function createAnthropicAdapter(apiKey: string, options?: AnthropicAdapterOptions): LLMClient;
454
+
455
+ /**
456
+ * Built-in OpenRouter adapter for evalsense
457
+ *
458
+ * OpenRouter provides access to multiple LLM providers (OpenAI, Anthropic, Google, Meta, etc.)
459
+ * through a single unified API. Great for comparing models or avoiding vendor lock-in.
460
+ *
461
+ * @example
462
+ * ```javascript
463
+ * import { setLLMClient, createOpenRouterAdapter } from 'evalsense/metrics';
464
+ *
465
+ * setLLMClient(createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
466
+ * model: 'anthropic/claude-3.5-sonnet'
467
+ * }));
468
+ * ```
469
+ */
470
+
471
+ interface OpenRouterAdapterOptions {
472
+ /**
473
+ * Model to use (in format: provider/model-name)
474
+ *
475
+ * Popular options:
476
+ * - `anthropic/claude-3.5-sonnet` - Latest Claude
477
+ * - `openai/gpt-4-turbo` - GPT-4 Turbo
478
+ * - `openai/gpt-3.5-turbo` - Cheap and fast
479
+ * - `google/gemini-pro` - Google Gemini
480
+ * - `meta-llama/llama-3-70b-instruct` - Open source
481
+ *
482
+ * See full list: https://openrouter.ai/models
483
+ *
484
+ * @default "anthropic/claude-3.5-sonnet"
485
+ */
486
+ model?: string;
487
+ /**
488
+ * Temperature for generation (0-2)
489
+ * @default 0
490
+ */
491
+ temperature?: number;
492
+ /**
493
+ * Maximum tokens per completion
494
+ * @default 4096
495
+ */
496
+ maxTokens?: number;
497
+ /**
498
+ * Your app name (for OpenRouter analytics)
499
+ * @default "evalsense"
500
+ */
501
+ appName?: string;
502
+ /**
503
+ * Your app URL (for OpenRouter analytics)
504
+ */
505
+ siteUrl?: string;
506
+ /**
507
+ * Request timeout in milliseconds
508
+ * @default 30000
509
+ */
510
+ timeout?: number;
511
+ }
512
+ /**
513
+ * Creates an LLM client adapter for OpenRouter.
514
+ *
515
+ * **Setup:**
516
+ * 1. Get API key from https://openrouter.ai/keys
517
+ * 2. Set environment variable: `export OPENROUTER_API_KEY="sk-or-..."`
518
+ * 3. No SDK needed - uses fetch API
519
+ *
520
+ * **Benefits:**
521
+ * - Access 100+ models from one API
522
+ * - Compare different providers easily
523
+ * - Automatic fallbacks and retries
524
+ * - Transparent pricing
525
+ *
526
+ * @param apiKey - Your OpenRouter API key
527
+ * @param options - Configuration options
528
+ * @returns LLM client for use with evalsense metrics
529
+ *
530
+ * @example
531
+ * ```javascript
532
+ * // Basic usage
533
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY);
534
+ * setLLMClient(client);
535
+ *
536
+ * // Use different model
537
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
538
+ * model: 'openai/gpt-3.5-turbo', // Cheaper option
539
+ * appName: 'my-eval-system'
540
+ * });
541
+ *
542
+ * // Use free models for testing
543
+ * const client = createOpenRouterAdapter(process.env.OPENROUTER_API_KEY, {
544
+ * model: 'meta-llama/llama-3-8b-instruct:free'
545
+ * });
546
+ * ```
547
+ */
548
+ declare function createOpenRouterAdapter(apiKey: string, options?: OpenRouterAdapterOptions): LLMClient;
549
+
550
+ export { type AnthropicAdapterOptions, BINARY_THRESHOLDS, type OpenAIAdapterOptions, type OpenRouterAdapterOptions, SEVERITY_THRESHOLDS, batch, batchItems, clearMetrics, createAnthropicAdapter, createErrorMockClient, createJSONSchema, createKeywordMetric, createLLMError, createMetricOutput, createMockLLMClient, createOpenAIAdapter, createOpenRouterAdapter, createPatternMetric, createSequentialMockClient, createSpyMockClient, delay, extractScore, fillPrompt, getLLMClient, getMetric, listMetrics, normalizeScore, parseJSONResponse, registerMetric, requireLLMClient, resetLLMClient, runMetric, scoreToLabel, setLLMClient, unregisterMetric, validateResponse, withTimeout };