npm - @arizeai/phoenix-evals - Versions diffs - 0.2.1 → 0.3.0 - Mend

@arizeai/phoenix-evals 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@arizeai/phoenix-evals",
-  "version": "0.2.1",
+  "version": "0.3.0",
   "description": "A library for running evaluations for AI use cases",
   "main": "dist/src/index.js",
   "module": "dist/esm/index.js",
@@ -34,23 +34,25 @@
   ],
   "keywords": [
     "evals",
+    "arize",
     "phoenix",
     "llm",
     "evaluation"
   ],
   "author": "oss@arize.com",
-  "license": "ELv2",
+  "license": "Apache-2.0",
   "devDependencies": {
     "@ai-sdk/openai": "^2.0.0",
     "@arizeai/openinference-instrumentation-openai": "^2.3.0",
     "@types/mustache": "^4.2.6",
     "@types/node": "^24.0.10",
+    "msw": "^2.11.6",
     "nock": "^14.0.5",
     "tsx": "^4.19.3",
     "typedoc": "^0.27.9",
     "typescript": "^5.8.2",
     "vitest": "^2.1.9",
-    "@arizeai/phoenix-client": "4.0.3"
+    "@arizeai/phoenix-client": "5.2.0"
   },
   "engines": {
     "node": ">=18"
@@ -65,7 +67,7 @@
     "clean": "rimraf dist",
     "prebuild": "pnpm run clean",
     "build": "tsc --build tsconfig.json tsconfig.esm.json && tsc-alias -p tsconfig.esm.json",
-    "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json && rimraf dist/test dist/examples",
+    "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json",
     "type:check": "tsc --noEmit",
     "test": "vitest --typecheck",
     "docs": "typedoc",

package/src/llm/ClassificationEvaluator.ts CHANGED Viewed

@@ -1,11 +1,12 @@
+import { getTemplateVariables } from "../template";
 import {
   CreateClassificationEvaluatorArgs,
   EvaluatorFn,
   Template,
 } from "../types";
 import { createClassifierFn } from "./createClassifierFn";
 import { LLMEvaluator } from "./LLMEvaluator";
-import { getTemplateVariables } from "../template";
 /**
  * An LLM evaluator that performs evaluation via classification

package/src/llm/LLMEvaluator.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import {
+  CreateEvaluatorArgs,
   EvaluationResult,
   Evaluator,
   OptimizationDirection,
-  CreateEvaluatorArgs,
 } from "../types";
 /**
@@ -12,7 +12,7 @@ export abstract class LLMEvaluator<RecordType extends Record<string, unknown>>
   implements Evaluator<RecordType>
 {
   readonly name: string;
-  readonly source = "LLM" as const;
+  readonly kind = "LLM" as const;
   readonly optimizationDirection?: OptimizationDirection;
   constructor({ name, optimizationDirection }: CreateEvaluatorArgs) {
     this.name = name;

package/src/llm/createClassificationEvaluator.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { CreateClassificationEvaluatorArgs } from "../types/evals";
 import { ClassificationEvaluator } from "./ClassificationEvaluator";
 export function createClassificationEvaluator<

package/src/llm/createClassifierFn.ts CHANGED Viewed

@@ -1,11 +1,12 @@
+import { formatTemplate } from "../template";
 import {
   ClassificationChoicesMap,
-  EvaluationResult,
   CreateClassifierArgs,
+  EvaluationResult,
   EvaluatorFn,
 } from "../types/evals";
 import { generateClassification } from "./generateClassification";
-import { formatTemplate } from "../template";
 /**
  * Convert a mapping of choices to labels

package/src/llm/createDocumentRelevancyEvaluator.ts CHANGED Viewed

@@ -1,8 +1,9 @@
-import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
 import {
-  DOCUMENT_RELEVANCY_TEMPLATE,
   DOCUMENT_RELEVANCY_CHOICES,
+  DOCUMENT_RELEVANCY_TEMPLATE,
 } from "../default_templates/DOCUMENT_RELEVANCY_TEMPLATE";
+import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
 import { createClassificationEvaluator } from "./createClassificationEvaluator";
 export interface DocumentRelevancyEvaluatorArgs

package/src/llm/createHallucinationEvaluator.ts CHANGED Viewed

@@ -1,10 +1,11 @@
-import { CreateClassificationEvaluatorArgs } from "../types/evals";
 import {
-  HALLUCINATION_TEMPLATE,
   HALLUCINATION_CHOICES,
+  HALLUCINATION_TEMPLATE,
 } from "../default_templates/HALLUCINATION_TEMPLATE";
-import { createClassificationEvaluator } from "./createClassificationEvaluator";
+import { CreateClassificationEvaluatorArgs } from "../types/evals";
 import { ClassificationEvaluator } from "./ClassificationEvaluator";
+import { createClassificationEvaluator } from "./createClassificationEvaluator";
 export interface HallucinationEvaluatorArgs
   extends Omit<

package/src/llm/generateClassification.ts CHANGED Viewed

@@ -1,9 +1,10 @@
+import { tracer } from "../telemetry";
 import { ClassificationResult, WithLLM } from "../types/evals";
 import { WithTelemetry } from "../types/otel";
 import type { WithPrompt } from "../types/prompts";
 import { generateObject } from "ai";
 import { z } from "zod";
-import { tracer } from "../telemetry";
 export type ClassifyArgs = WithLLM &
   WithTelemetry &
   WithPrompt & {

package/src/template/applyTemplate.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { Template } from "../types/templating";
 import Mustache from "mustache";
 /**

package/src/template/getTemplateVariables.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { Template } from "../types/templating";
 import Mustache from "mustache";
 type GetTemplateVariableArgs = {

package/src/types/evals.ts CHANGED Viewed

@@ -1,6 +1,7 @@
-import { LanguageModel } from "ai";
 import { WithTelemetry } from "./otel";
+import { LanguageModel } from "ai";
 /**
  * A specific AI example that is under evaluation
  */
@@ -101,9 +102,9 @@ export type EvaluatorFn<ExampleType extends Record<string, unknown>> = (
 ) => Promise<EvaluationResult>;
 /**
- * The source of the evaluation
+ * The kind of the evaluation
  */
-export type EvaluationSource = "LLM" | "CODE";
+export type EvaluationKind = "LLM" | "CODE";
 /**
  * The direction to optimize the numeric evaluation score
@@ -120,9 +121,9 @@ interface EvaluatorDescription {
    */
   name: string;
   /**
-   * The source of the evaluation. Also known as the "kind" of evaluator.
+   * The kind of the evaluation. Also known as the "kind" of evaluator.
    */
-  source: EvaluationSource;
+  kind: EvaluationKind;
   /**
    * The direction to optimize the numeric evaluation score
    * E.x. "MAXIMIZE" means that the higher the score, the better the evaluation