npm - judgeval - Versions diffs - 0.2.4 → 0.2.5 - Mend

judgeval 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

package/dist/types/scorers/metrics/hallucination/prompts.d.ts ADDED Viewed

@@ -0,0 +1,63 @@
+import { z } from 'zod';
+/**
+ * Schema for hallucination verdict
+ */
+export declare const HallucinationVerdictSchema: z.ZodObject<{
+    verdict: z.ZodString;
+    reason: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    reason: string;
+    verdict: string;
+}, {
+    reason: string;
+    verdict: string;
+}>;
+export type HallucinationVerdict = z.infer<typeof HallucinationVerdictSchema>;
+/**
+ * Schema for verdicts
+ */
+export declare const VerdictsSchema: z.ZodObject<{
+    verdicts: z.ZodArray<z.ZodObject<{
+        verdict: z.ZodString;
+        reason: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        reason: string;
+        verdict: string;
+    }, {
+        reason: string;
+        verdict: string;
+    }>, "many">;
+}, "strip", z.ZodTypeAny, {
+    verdicts: {
+        reason: string;
+        verdict: string;
+    }[];
+}, {
+    verdicts: {
+        reason: string;
+        verdict: string;
+    }[];
+}>;
+/**
+ * Schema for reason
+ */
+export declare const ReasonSchema: z.ZodObject<{
+    reason: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    reason: string;
+}, {
+    reason: string;
+}>;
+/**
+ * Templates for hallucination scorer prompts
+ */
+export declare class HallucinationTemplate {
+    /**
+     * Generate a prompt to evaluate hallucinations in the actual output
+     */
+    static generateVerdicts(actualOutput: string, contexts: string[]): string;
+    /**
+     * Generate a prompt to create a reason for the hallucination score
+     */
+    static generateReason(actualOutput: string, contexts: string[]): string;
+}

package/dist/types/scorers/metrics/instruction-adherence/index.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export * from './instruction-adherence.js';
2	+ export * from './prompts.js';

package/dist/types/scorers/metrics/instruction-adherence/instruction-adherence.d.ts ADDED Viewed

@@ -0,0 +1,67 @@
+import { Example } from '../../../data/example.js';
+import { ScorerData } from '../../../data/result.js';
+import { JudgevalScorer } from '../../base-scorer.js';
+import { Judge } from '../../../judges/index.js';
+/**
+ * InstructionAdherenceScorer evaluates how well an LLM follows instructions
+ * by extracting instructions from the input and checking if they are followed in the output.
+ *
+ * The score is the average of scores for each instruction (1 = followed, 0.5 = partially followed, 0 = not followed).
+ */
+export declare class InstructionAdherenceScorer extends JudgevalScorer {
+    private model;
+    private using_native_model;
+    private _instructions;
+    private _verdicts;
+    /**
+     * Create a new InstructionAdherenceScorer
+     *
+     * @param threshold - Success threshold (default: 0.5)
+     * @param model - Model to use for evaluation (default: DefaultJudge)
+     * @param include_reason - Whether to include a reason for the score (default: true)
+     * @param async_mode - Whether to use async mode (default: false)
+     * @param strict_mode - Whether to use strict mode (default: false)
+     * @param verbose_mode - Whether to include verbose logs (default: false)
+     */
+    constructor(threshold?: number, model?: string | Judge | undefined, include_reason?: boolean, async_mode?: boolean, strict_mode?: boolean, verbose_mode?: boolean);
+    /**
+     * Extract instructions from input text
+     */
+    private _aGetInstructions;
+    /**
+     * Extract instructions from input text (synchronous)
+     */
+    private _getInstructions;
+    /**
+     * Generate verdicts for each instruction
+     */
+    private _aGetVerdicts;
+    /**
+     * Generate verdicts for each instruction (synchronous)
+     */
+    private _getVerdicts;
+    /**
+     * Calculate the instruction adherence score
+     */
+    private _computeScore;
+    /**
+     * Create verbose logs for debugging
+     */
+    private _createVerboseLogs;
+    /**
+     * Check if example has required parameters
+     */
+    private _checkExampleParams;
+    /**
+     * Score an example synchronously
+     */
+    syncScoreExample(example: Example): ScorerData;
+    /**
+     * Score an example asynchronously
+     */
+    scoreExample(example: Example): Promise<ScorerData>;
+    /**
+     * Get the name of the scorer
+     */
+    get name(): string;
+}

package/dist/types/scorers/metrics/instruction-adherence/prompts.d.ts ADDED Viewed

@@ -0,0 +1,78 @@
+import { z } from 'zod';
+/**
+ * Schema for a single instruction adherence verdict
+ */
+export declare const VerdictSchema: z.ZodObject<{
+    instruction: z.ZodString;
+    score: z.ZodNumber;
+    reason: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    score: number;
+    reason: string;
+    instruction: string;
+}, {
+    score: number;
+    reason: string;
+    instruction: string;
+}>;
+/**
+ * Schema for a list of verdicts
+ */
+export declare const VerdictsSchema: z.ZodObject<{
+    verdicts: z.ZodArray<z.ZodObject<{
+        instruction: z.ZodString;
+        score: z.ZodNumber;
+        reason: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        score: number;
+        reason: string;
+        instruction: string;
+    }, {
+        score: number;
+        reason: string;
+        instruction: string;
+    }>, "many">;
+}, "strip", z.ZodTypeAny, {
+    verdicts: {
+        score: number;
+        reason: string;
+        instruction: string;
+    }[];
+}, {
+    verdicts: {
+        score: number;
+        reason: string;
+        instruction: string;
+    }[];
+}>;
+/**
+ * Schema for a list of instructions
+ */
+export declare const InstructionsSchema: z.ZodObject<{
+    instructions: z.ZodArray<z.ZodString, "many">;
+}, "strip", z.ZodTypeAny, {
+    instructions: string[];
+}, {
+    instructions: string[];
+}>;
+/**
+ * Type for a single verdict
+ */
+export type InstructionAdherenceVerdict = z.infer<typeof VerdictSchema>;
+/**
+ * Type for a list of instructions
+ */
+export type InstructionList = z.infer<typeof InstructionsSchema>;
+/**
+ * Templates for prompts used in the InstructionAdherenceScorer
+ */
+export declare class InstructionAdherenceTemplate {
+    /**
+     * Generate a prompt to extract instructions from input text
+     */
+    static getInstructions(input: string): string;
+    /**
+     * Generate a prompt to evaluate adherence to instructions
+     */
+    static generateVerdicts(instructions: string[], actualOutput: string): string;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "judgeval",
-  "version": "0.2.4",
+  "version": "0.2.5",
   "description": "Judgment SDK for TypeScript/JavaScript",
   "main": "./dist/cjs/index.js",
   "module": "./dist/esm/index.js",
@@ -22,46 +22,61 @@
     "@langchain/core": "^0.3.44",
     "@langchain/langgraph": "^0.2.63",
     "@langchain/openai": "^0.5.5",
-    "@supabase/supabase-js": "^2.43.4",
+    "@opentelemetry/api": "^1.8.0",
+    "@opentelemetry/core": "^1.30.1",
+    "@opentelemetry/sdk-trace-base": "^1.25.1",
+    "@supabase/supabase-js": "^2.42.5",
     "@types/node-fetch": "^2.6.12",
     "@types/uuid": "^10.0.0",
+    "ansi-colors": "^4.1.3",
     "axios": "^1.7.2",
     "chalk": "4.1.2",
+    "cli-progress": "^3.12.0",
     "csv-writer": "^1.6.0",
     "dotenv": "^16.4.7",
     "eventsource-parser": "^1.1.2",
     "json-schema-to-ts": "^3.1.0",
     "langchain": "^0.3.21",
     "node-fetch": "^2.7.0",
+    "openai": "^4.0.0",
     "papaparse": "^5.4.1",
+    "together-ai": "^0.11.1",
     "undici-types": "^6.21.0",
-    "uuid": "^11.1.0",
-    "winston": "^3.17.0",
-    "ws": "^8.18.1",
-    "cli-progress": "^3.12.0",
-    "ansi-colors": "^4.1.3"
+    "uuid": "^10.0.0",
+    "winston": "^3.13.1",
+    "ws": "^8.18.1"
   },
   "peerDependencies": {
     "@anthropic-ai/sdk": "^0.22.0",
     "openai": "^4.0.0",
-    "together-ai": "^0.7.0"
+    "react": "^18.2.0"
   },
   "devDependencies": {
+    "@ai-sdk/openai": "^1.3.16",
+    "@ai-sdk/provider": "0.0.26",
+    "@jest/globals": "^29.7.0",
+    "@opentelemetry/sdk-node": "^0.52.0",
+    "@opentelemetry/sdk-trace-node": "^1.8.0",
+    "@types/cli-progress": "^3.11.6",
     "@types/jest": "^29.5.12",
-    "@types/node": "^20.12.12",
+    "@types/node": "^20.14.10",
+    "@types/papaparse": "^5.3.15",
+    "@types/uuid": "^10.0.0",
     "@typescript-eslint/eslint-plugin": "^7.10.0",
     "@typescript-eslint/parser": "^7.10.0",
+    "ai": "^3.2.16",
     "cross-env": "^7.0.3",
     "eslint": "^8.57.0",
     "eslint-config-prettier": "^9.1.0",
     "eslint-plugin-prettier": "^5.1.3",
     "jest": "^29.7.0",
+    "nodemon": "^3.1.4",
     "prettier": "^3.2.5",
-    "ts-jest": "^29.1.2",
-    "typedoc": "^0.25.13",
-    "typescript": "^5.4.5",
-    "@types/cli-progress": "^3.11.6",
-    "@types/papaparse": "^5.3.15"
+    "ts-jest": "^29.1.5",
+    "ts-node": "^10.9.2",
+    "tsx": "^4.16.2",
+    "typedoc": "^0.26.0",
+    "typescript": "^5.5.3"
   },
   "scripts": {
     "build": "rm -rf dist && tsc -p tsconfig.cjs.json && tsc -p tsconfig.esm.json",
@@ -103,5 +118,8 @@
       "import": "./dist/esm/index.js",
       "require": "./dist/cjs/index.js"
     }
+  },
+  "overrides": {
+    "@ai-sdk/provider": "0.0.26"
   }
 }