npm - @eva-llm/eva-judge - Versions diffs - 0.1.0 → 0.1.2 - Mend

@eva-llm/eva-judge 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2026 [Your Name or Organization]
+Copyright (c) 2026 EVA-LLM
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

package/README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 # Project Inspiration & Attribution
-This project is inspired by [promptfoo](https://github.com/promptfoo/promptfoo), including author's work on the G-Eval framework there. The LLM-as-a-judge prompts are copied from promptfoo and adapted for project-specific issues.
+This project is inspired by [promptfoo](https://github.com/promptfoo/promptfoo), including author's work on the G-Eval framework there. The LLM-as-a-Judge prompts are copied from promptfoo and adapted for project-specific issues.
 # eva-judge
-A TypeScript/Node.js package for evaluating and managing test cases, prompts, and registry logic for AI or code evaluation workflows.
+A TypeScript/Node.js package for evaluating and managing test cases, prompts, and registry logic for AI or code evaluation workflows with LLM-Rubric or G-Eval.
 ## Features
 - Configuration management for evaluation workflows
@@ -12,26 +12,14 @@ A TypeScript/Node.js package for evaluating and managing test cases, prompts, an
 - Registry for test cases and evaluation items
 - Designed for integration with Jest and other test runners
-## Project Structure
-- `src/` — Main source code
-  - `config.ts` — Configuration logic
-  - `prompt.ts` — Prompt utilities
-  - `registry.ts` — Registry management
-  - `index.ts` — Entry point
-- `tests/` — Unit tests for all modules
 ## Getting Started
-### Prerequisites
-- Node.js (>= 16)
-- pnpm (recommended) or npm/yarn
 ### Installation
-Clone the repository and install dependencies:
 ```bash
-pnpm install
+npm install @eva-llm/eva-judge
+# or
+pnpm add @eva-llm/eva-judge
 ```
 ### Running Tests
@@ -45,7 +33,7 @@ pnpm test
 Import and use the modules in your TypeScript/Node.js project:
 ```typescript
-import { llmRubric, gEval } from 'eva-judge';
+import { llmRubric, gEval, bEval } from '@eva-llm/eva-judge';
 ```
 ### llmRubric
@@ -63,9 +51,10 @@ const result = await llmRubric(
 // result: { reason: string, pass: boolean, score: number }
 ```
 ### gEval
-Evaluates a reply against criteria and derived steps using an LLM. Returns a reason and normalized score.
+Evaluates a reply against criteria and derived steps using an LLM. Returns a reason and normalized score (0.0–1.0).
 ```typescript
 const result = await gEval(
@@ -79,6 +68,22 @@ const result = await gEval(
 // result: { reason: string, score: number }
 ```
+### bEval (Binary G-Eval)
+Evaluates a reply against criteria and derived steps using an LLM, but with binary scoring (0 or 1). Returns a reason and a normalized score (0 or 1).
+```typescript
+const result = await bEval(
+  prompt,      // string: the prompt given to the model
+  answer,      // string: the reply to evaluate
+  criteria,    // string: evaluation criteria
+  provider,    // string: LLM provider name
+  model,       // string: LLM model name
+  options      // optional: { temperature, providerOptions }
+);
+// result: { reason: string, score: number } // score will be 0 or 1
+```
 ## Development
 - Source code is in `src/`
 - Tests are in `tests/`
@@ -112,7 +117,7 @@ Specify the provider name and model name in `llmRubric` or `gEval`.
 You can provide hooks to receive notifications about evaluation events (success or error) for logging, monitoring, or custom handling. Hooks can also be used to integrate with observability tools such as OpenTelemetry for tracing and metrics. Set these in the config:
 ```typescript
-import Config from 'eva-judge';
+import Config from '@eva-llm/eva-judge';
 Config.hooks = {
   onSuccess: ({ method, params, result, duration }) => {
@@ -127,7 +132,7 @@ Config.hooks = {
 For advanced use, you can implement your own cache storage for evaluation steps (e.g., using Redis or another backend) by providing a custom cache via `setStepsCache()`:
 ```typescript
-import Config from 'eva-judge';
+import Config from '@eva-llm/eva-judge';
 Config.setStepsCache(RedisCache); // RedisCache must implement IStepsCache
 ```

package/dst/config.d.ts CHANGED Viewed

@@ -1,115 +1,37 @@
 import { LRUCache } from 'lru-cache';
 import { type LanguageModel } from 'ai';
-/**
- * Interface for a cache that stores evaluation steps.
- * Implementations should provide asynchronous set/get methods for storing and retrieving
- * arrays of strings, typically representing evaluation steps for a given key.
- */
+import { type EvalMethod } from './types';
 export interface IStepsCache {
-    /**
-     * Store an array of steps in the cache for a given key.
-     * @param key Unique identifier for the steps (e.g., criteria string).
-     * @param value Array of step strings to cache.
-     * @returns Promise that resolves when the value is set.
-     */
     set(key: string, value: string[]): Promise<void>;
-    /**
-     * Retrieve an array of steps from the cache for a given key.
-     * @param key Unique identifier for the steps (e.g., criteria string).
-     * @returns Promise resolving to the cached array of steps, or undefined if not found.
-     */
     get(key: string): Promise<string[] | undefined>;
 }
-/**
- * Optional hooks for receiving notifications about evaluation events.
- * Can be used to monitor or log success and error events for evaluation functions.
- */
 export interface EvaHooks {
-    /**
-     * Called when an evaluation completes successfully.
-     * @param data Information about the evaluation, including method, params, result, and duration (ms).
-     */
     onSuccess?: (data: {
-        method: 'gEval' | 'llmRubric';
+        method: EvalMethod;
         params: any;
         result: any;
         duration: number;
     }) => void;
-    /**
-     * Called when an evaluation throws an error.
-     * @param data Information about the error, including method, error object, and duration (ms).
-     */
     onError?: (data: {
-        method: 'gEval' | 'llmRubric';
+        method: EvalMethod;
         error: any;
         duration: number;
     }) => void;
 }
-/**
- * Global configuration and cache management for evaluation operations.
- * Provides options for enabling/disabling model and steps caching, and allows
- * customization of cache implementations and event hooks.
- */
 declare const _default: {
-    /**
-     * Maximum score for evaluation (used for normalization).
-     */
     gevalMaxScore: number;
-    /**
-     * Whether model caching is enabled (for LLM instances).
-     */
     isModelCached: boolean;
-    /**
-     * Whether steps caching is enabled (for evaluation steps).
-     */
     isStepsCached: boolean;
-    /**
-     * LRU cache for language model instances.
-     */
     modelCache: LRUCache<string, LanguageModel, unknown>;
-    /**
-     * Cache for evaluation steps (criteria → steps).
-     */
     stepsCache: IStepsCache;
-    /**
-     * Restart the model cache with a new maximum size.
-     * @param size The new cache size (default: 100).
-     */
     restartModelCache(size?: number): void;
-    /**
-     * Restart the steps cache with a new maximum size.
-     * @param size The new cache size (default: 500).
-     */
     restartStepsCache(size?: number): void;
-    /**
-     * Set a custom steps cache implementation.
-     * @param cache The new IStepsCache implementation to use.
-     */
     setStepsCache(cache: IStepsCache): void;
-    /**
-     * Enable model caching (LLM instances).
-     */
     enableModelCache(): void;
-    /**
-     * Disable model caching (LLM instances).
-     */
     disableModelCache(): void;
-    /**
-     * Enable steps caching (criteria → steps).
-     */
     enableStepsCache(): void;
-    /**
-     * Disable steps caching (criteria → steps).
-     */
     disableStepsCache(): void;
-    /**
-     * Hooks for evaluation events (success/error notifications).
-     */
     hooks: EvaHooks;
-    /**
-     * Set the hooks for evaluation events.
-     * @param hooks The hooks object implementing EvaHooks.
-     */
     setHooks(hooks: EvaHooks): void;
 };
 export default _default;

package/dst/config.js CHANGED Viewed

@@ -1,113 +1,46 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 const lru_cache_1 = require("lru-cache");
-/**
- * In-memory implementation of IStepsCache using an LRU (Least Recently Used) cache.
- * Useful for fast, ephemeral caching of evaluation steps during runtime.
- */
 class StepsMemoryAdapter {
     cache;
-    /**
-     * Construct a new StepsMemoryAdapter.
-     * @param size Maximum number of items to store in the cache.
-     */
     constructor(size) {
         this.cache = new lru_cache_1.LRUCache({ max: size });
     }
-    /**
-     * Store an array of steps in the cache for a given key.
-     * @inheritdoc
-     */
     async set(key, value) {
         this.cache.set(key, value);
     }
-    /**
-     * Retrieve an array of steps from the cache for a given key.
-     * @inheritdoc
-     */
     async get(key) {
         return this.cache.get(key);
     }
 }
-/**
- * Global configuration and cache management for evaluation operations.
- * Provides options for enabling/disabling model and steps caching, and allows
- * customization of cache implementations and event hooks.
- */
 exports.default = {
-    /**
-     * Maximum score for evaluation (used for normalization).
-     */
     gevalMaxScore: 10,
-    /**
-     * Whether model caching is enabled (for LLM instances).
-     */
     isModelCached: true,
-    /**
-     * Whether steps caching is enabled (for evaluation steps).
-     */
     isStepsCached: true,
-    /**
-     * LRU cache for language model instances.
-     */
     modelCache: new lru_cache_1.LRUCache({ max: 100 }),
-    /**
-     * Cache for evaluation steps (criteria → steps).
-     */
     stepsCache: new StepsMemoryAdapter(500),
-    /**
-     * Restart the model cache with a new maximum size.
-     * @param size The new cache size (default: 100).
-     */
     restartModelCache(size = 100) {
         this.modelCache = new lru_cache_1.LRUCache({ max: size });
     },
-    /**
-     * Restart the steps cache with a new maximum size.
-     * @param size The new cache size (default: 500).
-     */
     restartStepsCache(size = 500) {
         this.stepsCache = new StepsMemoryAdapter(size);
     },
-    /**
-     * Set a custom steps cache implementation.
-     * @param cache The new IStepsCache implementation to use.
-     */
     setStepsCache(cache) {
         this.stepsCache = cache;
     },
-    /**
-     * Enable model caching (LLM instances).
-     */
     enableModelCache() {
         this.isModelCached = true;
     },
-    /**
-     * Disable model caching (LLM instances).
-     */
     disableModelCache() {
         this.isModelCached = false;
     },
-    /**
-     * Enable steps caching (criteria → steps).
-     */
     enableStepsCache() {
         this.isStepsCached = true;
     },
-    /**
-     * Disable steps caching (criteria → steps).
-     */
     disableStepsCache() {
         this.isStepsCached = false;
     },
-    /**
-     * Hooks for evaluation events (success/error notifications).
-     */
     hooks: {},
-    /**
-     * Set the hooks for evaluation events.
-     * @param hooks The hooks object implementing EvaHooks.
-     */
     setHooks(hooks) {
         this.hooks = hooks;
     }

package/dst/config.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,yCAAqC;~~AA2BrC;;;GAGG;AACH~~,MAAM,kBAAkB;IACd,KAAK,CAA6B;~~IAE1C;;;OAGG;IACH~~,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;~~IAED;;;OAGG;IACH~~,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;~~IAED;;;OAGG;IACH~~,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;~~AA8BD;;;;GAIG;AACH~~,kBAAe;~~IACb;;OAEG;IACH~~,aAAa,EAAE,EAAE;~~IACjB;;OAEG;IACH~~,aAAa,EAAE,IAAI;~~IACnB;;OAEG;IACH~~,aAAa,EAAE,IAAI;~~IACnB;;OAEG;IACH~~,UAAU,EAAE,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;~~IAC7D;;OAEG;IACH~~,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;~~IAEtD;;;OAGG;IACH~~,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;~~IAED;;;OAGG;IACH~~,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;~~IAED;;;OAGG;IACH~~,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;~~IAED;;OAEG;IACH~~,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;~~IAED;;OAEG;IACH~~,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;~~IAED;;OAEG;IACH~~,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;~~IAED;;OAEG;IACH~~,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;~~IAED;;OAEG;IACH~~,KAAK,EAAE,EAAc;~~IAErB;;;OAGG;IACH~~,QAAQ,CAAC,KAAe;QACtB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}
1	+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":";;AAAA,yCAAqC;AA8BrC,MAAM,kBAAkB;IACd,KAAK,CAA6B;IAM1C,YAAY,IAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,oBAAQ,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,KAAe;QACpC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAMD,KAAK,CAAC,GAAG,CAAC,GAAW;QACnB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;CACF;AAiCD,kBAAe;IAIb,aAAa,EAAE,EAAE;IAIjB,aAAa,EAAE,IAAI;IAInB,aAAa,EAAE,IAAI;IAInB,UAAU,EAAE,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC;IAI7D,UAAU,EAAE,IAAI,kBAAkB,CAAC,GAAG,CAAgB;IAKtD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,oBAAQ,CAAwB,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACvE,CAAC;IAKD,iBAAiB,CAAC,OAAe,GAAG;QAClC,IAAI,CAAC,UAAU,GAAG,IAAI,kBAAkB,CAAC,IAAI,CAAgB,CAAC;IAChE,CAAC;IAKD,aAAa,CAAC,KAAkB;QAC9B,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;IAC1B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,gBAAgB;QACd,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;IAC5B,CAAC;IAID,iBAAiB;QACf,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;IAC7B,CAAC;IAID,KAAK,EAAE,EAAc;IAKrB,QAAQ,CAAC,KAAe;QACtB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;CACF,CAAC"}

package/dst/index.d.ts CHANGED Viewed

@@ -1,76 +1,25 @@
 import z from 'zod';
 export * from './config';
 export { default } from './config';
-/**
- * Options for evaluation functions.
- * Allows customization of LLM generation parameters and provider-specific options.
- */
 export interface EvalOptions {
-    /**
-     * Temperature for model generation (controls randomness).
-     */
     temperature?: number;
-    /**
-     * Additional provider-specific options (passed to the LLM provider).
-     */
     providerOptions?: Record<string, any>;
 }
-/**
- * Zod schema for rubric result.
- * Describes the structure of the result returned by rubric-based evaluation.
- */
 export declare const RubricResultSchema: z.ZodObject<{
     reason: z.ZodString;
     pass: z.ZodBoolean;
     score: z.ZodNumber;
 }, z.core.$strip>;
-/**
- * Type for rubric result (inferred from RubricResultSchema).
- */
 export type RubricResult = z.infer<typeof RubricResultSchema>;
-/**
- * Zod schema for evaluation steps result.
- * Describes the structure of the result containing evaluation steps derived from criteria.
- */
 export declare const GevalStepsResultSchema: z.ZodObject<{
     steps: z.ZodArray<z.ZodString>;
 }, z.core.$strip>;
-/**
- * Type for evaluation steps result (inferred from GevalStepsResultSchema).
- */
 export type GevalStepsResult = z.infer<typeof GevalStepsResultSchema>;
-/**
- * Zod schema for evaluation result.
- * Describes the structure of the result returned by the main evaluation function.
- */
 export declare const GevalEvaluateResultSchema: z.ZodObject<{
     reason: z.ZodString;
     score: z.ZodNumber;
 }, z.core.$strip>;
-/**
- * Type for evaluation result (inferred from GevalEvaluateResultSchema).
- */
 export type GevalEvaluateResult = z.infer<typeof GevalEvaluateResultSchema>;
-/**
- * Evaluate output against a rubric using an LLM.
- * Uses a system and user prompt to instruct the LLM to grade the output according to the rubric.
- * @param output The output to grade.
- * @param rubric The rubric to use for grading.
- * @param providerName The provider name for the LLM.
- * @param modelName The model name for the LLM.
- * @param options Optional evaluation options (temperature, providerOptions, etc).
- * @returns The rubric result (reason, pass, score).
- */
 export declare const llmRubric: (output: string, rubric: string, providerName: string, modelName: string, options?: EvalOptions) => Promise<RubricResult>;
-/**
- * Evaluate a reply against criteria and steps using an LLM.
- * If steps for the criteria are not cached, generates them first, then evaluates the answer.
- * @param prompt The prompt given to the model.
- * @param answer The reply to evaluate.
- * @param criteria The evaluation criteria (used to derive steps).
- * @param providerName The provider name for the LLM.
- * @param modelName The model name for the LLM.
- * @param options Optional evaluation options (temperature, providerOptions, etc).
- * @returns The evaluation result with normalized score (reason, score).
- */
 export declare const gEval: (prompt: string, answer: string, criteria: string, providerName: string, modelName: string, options?: EvalOptions) => Promise<GevalEvaluateResult>;
+export declare const bEval: (prompt: string, answer: string, criteria: string, providerName: string, modelName: string, options?: EvalOptions) => Promise<GevalEvaluateResult>;

package/dst/index.js CHANGED Viewed

@@ -39,7 +39,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.gEval = exports.llmRubric = exports.GevalEvaluateResultSchema = exports.GevalStepsResultSchema = exports.RubricResultSchema = exports.default = void 0;
+exports.bEval = exports.gEval = exports.llmRubric = exports.GevalEvaluateResultSchema = exports.GevalStepsResultSchema = exports.RubricResultSchema = exports.default = void 0;
 const ai_1 = require("ai");
 const Mustache = __importStar(require("mustache"));
 const zod_1 = __importDefault(require("zod"));
@@ -49,46 +49,18 @@ const config_1 = __importDefault(require("./config"));
 __exportStar(require("./config"), exports);
 var config_2 = require("./config");
 Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(config_2).default; } });
-/**
- * Zod schema for rubric result.
- * Describes the structure of the result returned by rubric-based evaluation.
- */
 exports.RubricResultSchema = zod_1.default.object({
-    /** Detailed explanation of the score based on the rubric. */
     reason: zod_1.default.string().describe('Detailed explanation of the score based on the rubric'),
-    /** Whether the output satisfies the minimum requirements. */
     pass: zod_1.default.boolean().describe('Whether the output satisfies the minimum requirements'),
-    /** Numeric representation of quality (0-1). */
     score: zod_1.default.number().min(0).max(1).describe('Numeric representation of quality'),
 });
-/**
- * Zod schema for evaluation steps result.
- * Describes the structure of the result containing evaluation steps derived from criteria.
- */
 exports.GevalStepsResultSchema = zod_1.default.object({
-    /** List of concise evaluation steps derived from the criteria. */
     steps: zod_1.default.array(zod_1.default.string()).describe('List of concise evaluation steps derived from the criteria'),
 });
-/**
- * Zod schema for evaluation result.
- * Describes the structure of the result returned by the main evaluation function.
- */
 exports.GevalEvaluateResultSchema = zod_1.default.object({
-    /** Detailed explanation of the score based on the rubric. */
     reason: zod_1.default.string().describe('Detailed explanation of the score based on the rubric'),
-    /** Numeric representation of quality (normalized score, 0-1). */
     score: zod_1.default.number().min(0).describe('Numeric representation of quality'),
 });
-/**
- * Evaluate output against a rubric using an LLM.
- * Uses a system and user prompt to instruct the LLM to grade the output according to the rubric.
- * @param output The output to grade.
- * @param rubric The rubric to use for grading.
- * @param providerName The provider name for the LLM.
- * @param modelName The model name for the LLM.
- * @param options Optional evaluation options (temperature, providerOptions, etc).
- * @returns The rubric result (reason, pass, score).
- */
 const llmRubric = async (output, rubric, providerName, modelName, options = {}) => {
     const start = Date.now();
     try {
@@ -120,18 +92,7 @@ const llmRubric = async (output, rubric, providerName, modelName, options = {})
     }
 };
 exports.llmRubric = llmRubric;
-/**
- * Evaluate a reply against criteria and steps using an LLM.
- * If steps for the criteria are not cached, generates them first, then evaluates the answer.
- * @param prompt The prompt given to the model.
- * @param answer The reply to evaluate.
- * @param criteria The evaluation criteria (used to derive steps).
- * @param providerName The provider name for the LLM.
- * @param modelName The model name for the LLM.
- * @param options Optional evaluation options (temperature, providerOptions, etc).
- * @returns The evaluation result with normalized score (reason, score).
- */
-const gEval = async (prompt, answer, criteria, providerName, modelName, options = {}) => {
+const _gEval = async (prompt, answer, criteria, providerName, modelName, maxScore, methodName, options = {}) => {
     const start = Date.now();
     try {
         const model = (0, registry_1.getModel)(providerName, modelName);
@@ -147,14 +108,14 @@ const gEval = async (prompt, answer, criteria, providerName, modelName, options
                 ...options,
             });
             steps = stepsResult.steps;
-            (0, registry_1.setSteps)(criteria, stepsResult.steps); // NOTE: cache asynchronously, without awaiting
+            (0, registry_1.setSteps)(criteria, stepsResult.steps);
         }
         const evaluationPrompt = Mustache.render(prompt_1.GEVAL_EVALUATE_PROMPT, {
             criteria,
             steps: steps.join('\n- '),
             input: prompt,
             output: answer,
-            maxScore: config_1.default.gevalMaxScore,
+            maxScore,
         });
         const { output: evalResult } = await (0, ai_1.generateText)({
             model,
@@ -166,10 +127,10 @@ const gEval = async (prompt, answer, criteria, providerName, modelName, options
         });
         const result = {
             reason: evalResult.reason,
-            score: evalResult.score / config_1.default.gevalMaxScore,
+            score: evalResult.score / maxScore,
         };
         config_1.default.hooks.onSuccess?.({
-            method: 'gEval',
+            method: methodName,
             params: { prompt, answer, criteria, providerName, modelName, options },
             result,
             duration: Date.now() - start,
@@ -178,12 +139,15 @@ const gEval = async (prompt, answer, criteria, providerName, modelName, options
     }
     catch (error) {
         config_1.default.hooks.onError?.({
-            method: 'gEval',
+            method: methodName,
             error,
             duration: Date.now() - start,
         });
         throw error;
     }
 };
+const gEval = async (prompt, answer, criteria, providerName, modelName, options = {}) => _gEval(prompt, answer, criteria, providerName, modelName, config_1.default.gevalMaxScore, 'gEval', options);
 exports.gEval = gEval;
+const bEval = async (prompt, answer, criteria, providerName, modelName, options = {}) => _gEval(prompt, answer, criteria, providerName, modelName, 1, 'bEval', options);
+exports.bEval = bEval;
 //# sourceMappingURL=index.js.map

package/dst/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2BAA0C;AAC1C,mDAAqC;AACrC,8CAAoB;AAEpB,qCAKkB;AAClB,yCAA0D;AAC1D,sDAA4B;AAG5B,2CAAyB;AACzB,mCAAmC;AAA1B,kHAAA,OAAO,OAAA;~~AAmBhB;;;GAGG;AACU~~,QAAA,kBAAkB,GAAG,aAAC,CAAC,MAAM,CAAC;~~IACzC~~,~~6DAA6D;IAC7D,~~MAAM,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;~~IACpF~~,~~6DAA6D;IAC7D,~~IAAI,EAAE,aAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;~~IACnF,+CAA+C;IAC/C~~,KAAK,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CAC9E,CAAC,CAAC;~~AAQH;;;GAGG;AACU~~,QAAA,sBAAsB,GAAG,aAAC,CAAC,MAAM,CAAC;~~IAC7C~~,~~kEAAkE;IAClE,~~KAAK,EAAE,aAAC,CAAC,KAAK,CAAC,aAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;CAClG,CAAC,CAAC;~~AAQH;;;GAGG;AACU~~,QAAA,yBAAyB,GAAG,aAAC,CAAC,MAAM,CAAC;~~IAChD~~,~~6DAA6D;IAC7D,~~MAAM,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;~~IACpF~~,~~iEAAiE;IACjE,~~KAAK,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACvE,CAAC,CAAC;~~AAOH;;;;;;;;;GASG;AACI~~,MAAM,SAAS,GAAG,KAAK,EAC5B,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACF,EAAE;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,+BAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAE/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAC5C,KAAK,EAAE,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC;YACxC,MAAM,EAAE,iCAAwB;YAChC,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,0BAAkB;aAC3B,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YAC5D,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,WAAW;YACnB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAvCY,QAAA,SAAS,aAuCrB;AAED~~;;;;;;;;;;GAUG;AACI~~,MAAM,~~KAAK~~,GAAG,KAAK,~~EACxB~~,MAAc,EACd,MAAc,EACd,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACK,EAAE;IAChC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAC;QAErC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,2BAAkB,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEtE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;gBACjD,KAAK;gBACL,MAAM,EAAE,WAAW;gBACnB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;oBACpB,MAAM,EAAE,8BAAsB;iBAC/B,CAAC;gBACF,GAAG,OAAO;aACX,CAAC,CAAC;YAEH,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAE1B,IAAA,mBAAQ,EAAC,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC~~,CAAC,+CAA+C~~;~~QACxF~~,CAAC;QAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CAAC,8BAAqB,EAAE;YAC9D,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YACzB,KAAK,EAAE,MAAM;YACb,MAAM,EAAE,MAAM;YACd,QAAQ~~,EAAE,gBAAI,CAAC,aAAa~~;~~SAC7B~~,CAAC,CAAC;QAEH,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAChD,KAAK;YACL,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,iCAAyB;aAClC,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,KAAK,EAAE,UAAU,CAAC,KAAK,GAAG,~~gBAAI,CAAC,aAAa~~;~~SAC7C~~,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,~~OAAO~~;~~YACf~~,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YACtE,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,~~OAAO~~;~~YACf~~,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;~~AAvEY~~,QAAA,KAAK,~~SAuEjB~~"}
1	+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2BAA0C;AAC1C,mDAAqC;AACrC,8CAAoB;AAEpB,qCAKkB;AAClB,yCAA0D;AAC1D,sDAA4B;AAG5B,2CAAyB;AACzB,mCAAmC;AAA1B,kHAAA,OAAO,OAAA;AAqBH,QAAA,kBAAkB,GAAG,aAAC,CAAC,MAAM,CAAC;IAEzC,MAAM,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,IAAI,EAAE,aAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEnF,KAAK,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CAC9E,CAAC,CAAC;AAUU,QAAA,sBAAsB,GAAG,aAAC,CAAC,MAAM,CAAC;IAE7C,KAAK,EAAE,aAAC,CAAC,KAAK,CAAC,aAAC,CAAC,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,4DAA4D,CAAC;CAClG,CAAC,CAAC;AAWU,QAAA,yBAAyB,GAAG,aAAC,CAAC,MAAM,CAAC;IAEhD,MAAM,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,uDAAuD,CAAC;IAEpF,KAAK,EAAE,aAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,mCAAmC,CAAC;CACvE,CAAC,CAAC;AAgBI,MAAM,SAAS,GAAG,KAAK,EAC5B,MAAc,EACd,MAAc,EACd,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACF,EAAE;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,IAAI,CAAC;QACH,MAAM,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC,+BAAsB,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAE/E,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAC5C,KAAK,EAAE,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC;YACxC,MAAM,EAAE,iCAAwB;YAChC,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,0BAAkB;aAC3B,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,WAAW;YACnB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YAC5D,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,WAAW;YACnB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAvCY,QAAA,SAAS,aAuCrB;AAED,MAAM,MAAM,GAAG,KAAK,EAClB,MAAc,EACd,MAAc,EACd,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,QAAgB,EAChB,UAAsB,EACtB,UAAuB,EAAE,EACK,EAAE;IAChC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAEzB,IAAI,CAAC;QACH,MAAM,KAAK,GAAG,IAAA,mBAAQ,EAAC,YAAY,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,KAAK,GAAG,MAAM,IAAA,mBAAQ,EAAC,QAAQ,CAAC,CAAC;QAErC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,2BAAkB,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC;YAEtE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;gBACjD,KAAK;gBACL,MAAM,EAAE,WAAW;gBACnB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;oBACpB,MAAM,EAAE,8BAAsB;iBAC/B,CAAC;gBACF,GAAG,OAAO;aACX,CAAC,CAAC;YAEH,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC;YAE1B,IAAA,mBAAQ,EAAC,QAAQ,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,gBAAgB,GAAG,QAAQ,CAAC,MAAM,CAAC,8BAAqB,EAAE;YAC9D,QAAQ;YACR,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC;YACzB,KAAK,EAAE,MAAM;YACb,MAAM,EAAE,MAAM;YACd,QAAQ;SACT,CAAC,CAAC;QAEH,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,IAAA,iBAAY,EAAC;YAChD,KAAK;YACL,MAAM,EAAE,gBAAgB;YACxB,MAAM,EAAE,WAAM,CAAC,MAAM,CAAC;gBACpB,MAAM,EAAE,iCAAyB;aAClC,CAAC;YACF,GAAG,OAAO;SACX,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG;YACb,MAAM,EAAE,UAAU,CAAC,MAAM;YACzB,KAAK,EAAE,UAAU,CAAC,KAAK,GAAG,QAAQ;SACnC,CAAC;QAEF,gBAAI,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACrB,MAAM,EAAE,UAAU;YAClB,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE;YACtE,MAAM;YACN,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAEf,gBAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,EAAE,UAAU;YAClB,KAAK;YACL,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;SAC7B,CAAC,CAAC;QAEH,MAAM,KAAK,CAAC;IACd,CAAC;AACH,CAAC,CAAA;AAaM,MAAM,KAAK,GAAG,KAAK,EACxB,MAAc,EACd,MAAc,EACd,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACK,EAAE,CAAC,MAAM,CACvC,MAAM,EACN,MAAM,EACN,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,gBAAI,CAAC,aAAa,EAClB,OAAO,EACP,OAAO,CACR,CAAC;AAhBW,QAAA,KAAK,SAgBhB;AAaK,MAAM,KAAK,GAAG,KAAK,EACxB,MAAc,EACd,MAAc,EACd,QAAgB,EAChB,YAAoB,EACpB,SAAiB,EACjB,UAAuB,EAAE,EACK,EAAE,CAAC,MAAM,CACvC,MAAM,EACN,MAAM,EACN,QAAQ,EACR,YAAY,EACZ,SAAS,EACT,CAAC,EACD,OAAO,EACP,OAAO,CACR,CAAC;AAhBW,QAAA,KAAK,SAgBhB"}

package/dst/prompt.d.ts CHANGED Viewed

@@ -1,16 +1,8 @@
-/**
- * System prompt for LLM rubric-based grading. Guides the LLM to grade output according to a rubric and respond with a JSON object.
+/*!
+ * Portions of this code are based on Promptfoo (MIT License)
+ * Copyright (c) 2025 Promptfoo
  */
 export declare const LLM_RUBRIC_SYSTEM_PROMPT = "You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}\n\nExamples:\n\n<Output>Hello world</Output>\n<Rubric>Content contains a greeting</Rubric>\n{\"reason\": \"the content contains the word 'Hello'\", \"pass\": true, \"score\": 1.0}\n\n<Output>Avast ye swabs, repel the invaders!</Output>\n<Rubric>Does not speak like a pirate</Rubric>\n{\"reason\": \"'avast ye' is a common pirate term\", \"pass\": false, \"score\": 0.0}\n";
-/**
- * User prompt template for rubric-based grading. Used to inject output and rubric into the prompt.
- */
 export declare const LLM_RUBRIC_USER_PROMPT = "<Output>\n{{output}}\n</Output>\n<Rubric>\n{{rubric}}\n</Rubric>";
-/**
- * System prompt for generating evaluation steps from criteria. Guides the LLM to output a minified JSON array of steps.
- */
 export declare const GEVAL_STEPS_PROMPT = "\nGiven an evaluation criteria which outlines how you should judge a piece of text, generate 3-4 concise evaluation steps applicable to any text based on the criteria below and designed to confirm the criteria.\n\n**EVALUATION CRITERIA**\n{{criteria}}\n\n**OUTPUT FORMAT**\nIMPORTANT:\n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain a single key, \"steps\", whose value is a list of strings.\n- Each string must represent one evaluation step.\n- Do NOT include any explanations, commentary, extra text, or additional formatting.\n\nFormat:\n{\"steps\": <list_of_strings>}\n\nExample:\n{\"steps\":[\"<Evaluation Step 1>\",\"<Evaluation Step 2>\",\"<Evaluation Step 3>\",\"<Evaluation Step 4>\"]}\n\nHere are the 3-4 concise evaluation steps, formatted as required in a minified JSON:\nJSON:\n";
-/**
- * System prompt for evaluating a reply against criteria and steps. Guides the LLM to return a JSON with score and reason.
- */
 export declare const GEVAL_EVALUATE_PROMPT = "\nYou will be given one Reply for a Prompt below. Your task is to rate the Reply on one metric.\nPlease make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.\n\n**Evaluation Criteria**\n{{criteria}}\n\n**Evaluation Steps**\n- {{steps}}\nGiven the evaluation steps, return a JSON with two keys: \n  1) a \"score\" key that MUST be an integer from 0 to {{maxScore}}, where {{maxScore}} indicates that the Evaluation Criteria is fully and clearly present in the Reply according to the Evaluation Steps, and 0 indicates the total absence of the Evaluation Criteria;\n  2) a \"reason\" key, a reason for the given score, but DO NOT QUOTE THE SCORE in your reason. Please mention specific information from Prompt and Reply in your reason, but be very concise with it!\n\n**Prompt**\n{{input}}\n\n**Reply**\n{{output}}\n\n**OUTPUT FORMAT**\nIMPORTANT: \n- Return output ONLY as a minified JSON object (no code fences).\n- The JSON object must contain exactly two keys: \"score\" and \"reason\".\n- No additional words, explanations, or formatting are needed.\n- Absolutely no additional text, explanations, line breaks, or formatting outside the JSON object are allowed.\n\nExample JSON:\n{\"score\":0,\"reason\":\"The text of reply does not follow the evaluation criteria provided.\"}\n\nHere is the final evaluation in the required minified JSON format:\nJSON:\n";

package/dst/prompt.js CHANGED Viewed

@@ -1,9 +1,10 @@
 "use strict";
+/*!
+ * Portions of this code are based on Promptfoo (MIT License)
+ * Copyright (c) 2025 Promptfoo
+ */
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.GEVAL_EVALUATE_PROMPT = exports.GEVAL_STEPS_PROMPT = exports.LLM_RUBRIC_USER_PROMPT = exports.LLM_RUBRIC_SYSTEM_PROMPT = void 0;
-/**
- * System prompt for LLM rubric-based grading. Guides the LLM to grade output according to a rubric and respond with a JSON object.
- */
 exports.LLM_RUBRIC_SYSTEM_PROMPT = `You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. You respond with a JSON object with this structure: {reason: string, pass: boolean, score: number}
 Examples:
@@ -16,13 +17,7 @@ Examples:
 <Rubric>Does not speak like a pirate</Rubric>
 {"reason": "'avast ye' is a common pirate term", "pass": false, "score": 0.0}
 `;
-/**
- * User prompt template for rubric-based grading. Used to inject output and rubric into the prompt.
- */
 exports.LLM_RUBRIC_USER_PROMPT = '<Output>\n{{output}}\n</Output>\n<Rubric>\n{{rubric}}\n</Rubric>';
-/**
- * System prompt for generating evaluation steps from criteria. Guides the LLM to output a minified JSON array of steps.
- */
 exports.GEVAL_STEPS_PROMPT = `
 Given an evaluation criteria which outlines how you should judge a piece of text, generate 3-4 concise evaluation steps applicable to any text based on the criteria below and designed to confirm the criteria.
@@ -45,9 +40,6 @@ Example:
 Here are the 3-4 concise evaluation steps, formatted as required in a minified JSON:
 JSON:
 `;
-/**
- * System prompt for evaluating a reply against criteria and steps. Guides the LLM to return a JSON with score and reason.
- */
 exports.GEVAL_EVALUATE_PROMPT = `
 You will be given one Reply for a Prompt below. Your task is to rate the Reply on one metric.
 Please make sure you read and understand these instructions carefully. Please keep this document open while reviewing, and refer to it as needed.

package/dst/prompt.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":";;;~~AAAA;;GAEG;AACU~~,QAAA,wBAAwB,GAAG;;;;;;;;;;;CAWvC,CAAC;~~AAEF;;GAEG;AACU~~,QAAA,sBAAsB,GAAG,kEAAkE,CAAC;~~AAEzG;;GAEG;AACU~~,QAAA,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;CAqBjC,CAAC;~~AAEF;;GAEG;AACU~~,QAAA,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+BpC,CAAC"}
1	+ {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":";AAAA;;;GAGG;;;AAKU,QAAA,wBAAwB,GAAG;;;;;;;;;;;CAWvC,CAAC;AAKW,QAAA,sBAAsB,GAAG,kEAAkE,CAAC;AAK5F,QAAA,kBAAkB,GAAG;;;;;;;;;;;;;;;;;;;;;CAqBjC,CAAC;AAKW,QAAA,qBAAqB,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+BpC,CAAC"}

package/dst/registry.d.ts CHANGED Viewed

@@ -1,21 +1,4 @@
 import { type LanguageModel } from 'ai';
-/**
- * Get a language model instance from the provider and model name, using cache if enabled.
- * @param providerName The provider name (e.g., 'openai').
- * @param modelName The model name.
- * @returns The language model instance.
- */
 export declare const getModel: (providerName: string, modelName: string) => LanguageModel;
-/**
- * Get cached evaluation steps for a criteria, if caching is enabled.
- * @param criteria The evaluation criteria string.
- * @returns Promise resolving to the cached steps or undefined.
- */
 export declare const getSteps: (criteria: string) => Promise<string[] | undefined>;
-/**
- * Set evaluation steps for a criteria in the cache, if caching is enabled.
- * @param criteria The evaluation criteria string.
- * @param steps The steps to cache.
- * @returns Promise that resolves when the steps are set.
- */
 export declare const setSteps: (criteria: string, steps: string[]) => Promise<void>;

package/dst/registry.js CHANGED Viewed

@@ -49,9 +49,6 @@ const groq_1 = require("@ai-sdk/groq");
 const perplexity_1 = require("@ai-sdk/perplexity");
 const xai_1 = require("@ai-sdk/xai");
 const config_1 = __importDefault(require("./config"));
-/**
- * Map of provider names to provider functions.
- */
 const PROVIDERS = {
     openai: openai_1.openai,
     anthropic: anthropic_1.anthropic,
@@ -64,12 +61,6 @@ const PROVIDERS = {
     perplexity: perplexity_1.perplexity,
     xai: xai_1.xai,
 };
-/**
- * Get a language model instance from the provider and model name, using cache if enabled.
- * @param providerName The provider name (e.g., 'openai').
- * @param modelName The model name.
- * @returns The language model instance.
- */
 const getModel = (providerName, modelName) => {
     const cacheKey = `${providerName}:${modelName}`;
     let model = config_1.default.isModelCached ? config_1.default.modelCache.get(cacheKey) : undefined;
@@ -86,29 +77,13 @@ const getModel = (providerName, modelName) => {
     return model;
 };
 exports.getModel = getModel;
-/**
- * Compute the MD5 hash of a string.
- * @param str The input string.
- * @returns The MD5 hash as a hex string.
- */
 const md5 = (str) => {
     return crypto.createHash('md5').update(str).digest('hex');
 };
-/**
- * Get cached evaluation steps for a criteria, if caching is enabled.
- * @param criteria The evaluation criteria string.
- * @returns Promise resolving to the cached steps or undefined.
- */
 const getSteps = (criteria) => {
     return config_1.default.isStepsCached ? config_1.default.stepsCache.get(md5(criteria)) : Promise.resolve(undefined);
 };
 exports.getSteps = getSteps;
-/**
- * Set evaluation steps for a criteria in the cache, if caching is enabled.
- * @param criteria The evaluation criteria string.
- * @param steps The steps to cache.
- * @returns Promise that resolves when the steps are set.
- */
 const setSteps = (criteria, steps) => {
     if (config_1.default.isStepsCached) {
         return config_1.default.stepsCache.set(md5(criteria), steps);

package/dst/registry.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"registry.js","sourceRoot":"","sources":["../src/registry.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,oDAAsC;AAEtC,2CAAwC;AACxC,iDAA8C;AAC9C,2CAAwC;AACxC,6CAA0C;AAC1C,2DAAiD;AACjD,yCAAsC;AACtC,+CAA4C;AAC5C,uCAAoC;AACpC,mDAAgD;AAChD,qCAAkC;AAElC,sDAA4B;~~AAG5B;;GAEG;AACH~~,MAAM,SAAS,GAA6B;IAC1C,MAAM,EAAN,eAAM;IACN,SAAS,EAAT,qBAAS;IACT,MAAM,EAAN,eAAM;IACN,OAAO,EAAP,iBAAO;IACP,OAAO,EAAP,wBAAO;IACP,KAAK,EAAL,aAAK;IACL,QAAQ,EAAR,mBAAQ;IACR,IAAI,EAAJ,WAAI;IACJ,UAAU,EAAV,uBAAU;IACV,GAAG,EAAH,SAAG;CACJ,CAAC;~~AAEF;;;;;GAKG;AACI~~,MAAM,QAAQ,GAAG,CAAC,YAAoB,EAAE,SAAiB,EAAiB,EAAE;IACjF,MAAM,QAAQ,GAAG,GAAG,YAAY,IAAI,SAAS,EAAE,CAAC;IAEhD,IAAI,KAAK,GAAG,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3E,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,QAAQ,GAAG,SAAS,CAAC,YAAY,CAAC,CAAC;QAEzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sBAAsB,YAAY,2BAA2B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpH,CAAC;QAED,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;QAE5B,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;YACvB,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC,CAAA;AApBY,QAAA,QAAQ,YAoBpB;~~AAED;;;;GAIG;AACH~~,MAAM,GAAG,GAAG,CAAC,GAAW,EAAU,EAAE;IAClC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC,CAAA;~~AAED;;;;GAIG;AACI~~,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAiC,EAAE;IAC1E,OAAO,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAC9F,CAAC,CAAA;AAFY,QAAA,QAAQ,YAEpB;~~AAED;;;;;GAKG;AACI~~,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAE,KAAe,EAAiB,EAAE;IAC3E,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;QACvB,OAAO,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;AAC3B,CAAC,CAAA;AANY,QAAA,QAAQ,YAMpB"}
1	+ {"version":3,"file":"registry.js","sourceRoot":"","sources":["../src/registry.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,oDAAsC;AAEtC,2CAAwC;AACxC,iDAA8C;AAC9C,2CAAwC;AACxC,6CAA0C;AAC1C,2DAAiD;AACjD,yCAAsC;AACtC,+CAA4C;AAC5C,uCAAoC;AACpC,mDAAgD;AAChD,qCAAkC;AAElC,sDAA4B;AAM5B,MAAM,SAAS,GAA6B;IAC1C,MAAM,EAAN,eAAM;IACN,SAAS,EAAT,qBAAS;IACT,MAAM,EAAN,eAAM;IACN,OAAO,EAAP,iBAAO;IACP,OAAO,EAAP,wBAAO;IACP,KAAK,EAAL,aAAK;IACL,QAAQ,EAAR,mBAAQ;IACR,IAAI,EAAJ,WAAI;IACJ,UAAU,EAAV,uBAAU;IACV,GAAG,EAAH,SAAG;CACJ,CAAC;AAQK,MAAM,QAAQ,GAAG,CAAC,YAAoB,EAAE,SAAiB,EAAiB,EAAE;IACjF,MAAM,QAAQ,GAAG,GAAG,YAAY,IAAI,SAAS,EAAE,CAAC;IAEhD,IAAI,KAAK,GAAG,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAE3E,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,MAAM,QAAQ,GAAG,SAAS,CAAC,YAAY,CAAC,CAAC;QAEzC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,MAAM,IAAI,KAAK,CAAC,sBAAsB,YAAY,2BAA2B,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpH,CAAC;QAED,KAAK,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;QAE5B,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;YACvB,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAM,CAAC;AAChB,CAAC,CAAA;AApBY,QAAA,QAAQ,YAoBpB;AAOD,MAAM,GAAG,GAAG,CAAC,GAAW,EAAU,EAAE;IAClC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC5D,CAAC,CAAA;AAOM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAiC,EAAE;IAC1E,OAAO,gBAAI,CAAC,aAAa,CAAC,CAAC,CAAC,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;AAC9F,CAAC,CAAA;AAFY,QAAA,QAAQ,YAEpB;AAQM,MAAM,QAAQ,GAAG,CAAC,QAAgB,EAAE,KAAe,EAAiB,EAAE;IAC3E,IAAI,gBAAI,CAAC,aAAa,EAAE,CAAC;QACvB,OAAO,gBAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IACnD,CAAC;IAED,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;AAC3B,CAAC,CAAA;AANY,QAAA,QAAQ,YAMpB"}

package/dst/types.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export type EvalMethod = 'bEval' \| 'gEval' \| 'llmRubric';

package/dst/types.js ADDED Viewed

@@ -0,0 +1,3 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+//# sourceMappingURL=types.js.map

package/dst/types.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@eva-llm/eva-judge",
-  "version": "0.1.0",
-  "description": "LLM-as-a-judge abstraction layer using ai-sdk and plugins",
+  "version": "0.1.2",
+  "description": "LLM-as-a-Judge abstraction layer using ai-sdk and plugins",
   "main": "dst/index.js",
   "types": "dst/index.d.ts",
   "engines": {