@arizeai/phoenix-evals 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +205 -0
- package/README.md +10 -0
- package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/ClassificationEvaluator.js +1 -1
- package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/LLMEvaluator.d.ts +2 -2
- package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/LLMEvaluator.js +1 -1
- package/dist/esm/llm/LLMEvaluator.js.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/createClassifierFn.d.ts.map +1 -1
- package/dist/esm/llm/createClassifierFn.js +1 -1
- package/dist/esm/llm/createClassifierFn.js.map +1 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.js +1 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.js +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/esm/llm/generateClassification.d.ts.map +1 -1
- package/dist/esm/llm/generateClassification.js +1 -1
- package/dist/esm/llm/generateClassification.js.map +1 -1
- package/dist/esm/template/applyTemplate.d.ts.map +1 -1
- package/dist/esm/template/applyTemplate.js.map +1 -1
- package/dist/esm/template/getTemplateVariables.d.ts.map +1 -1
- package/dist/esm/template/getTemplateVariables.js.map +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/evals.d.ts +5 -5
- package/dist/esm/types/evals.d.ts.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.js +1 -1
- package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/LLMEvaluator.d.ts +2 -2
- package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/src/llm/LLMEvaluator.js +1 -1
- package/dist/src/llm/LLMEvaluator.js.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/createClassifierFn.d.ts.map +1 -1
- package/dist/src/llm/createClassifierFn.js +1 -1
- package/dist/src/llm/createClassifierFn.js.map +1 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/src/llm/generateClassification.d.ts.map +1 -1
- package/dist/src/llm/generateClassification.js +1 -1
- package/dist/src/llm/generateClassification.js.map +1 -1
- package/dist/src/template/applyTemplate.d.ts.map +1 -1
- package/dist/src/template/applyTemplate.js.map +1 -1
- package/dist/src/template/getTemplateVariables.d.ts.map +1 -1
- package/dist/src/template/getTemplateVariables.js.map +1 -1
- package/dist/src/types/evals.d.ts +5 -5
- package/dist/src/types/evals.d.ts.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +6 -4
- package/src/llm/ClassificationEvaluator.ts +2 -1
- package/src/llm/LLMEvaluator.ts +2 -2
- package/src/llm/createClassificationEvaluator.ts +1 -0
- package/src/llm/createClassifierFn.ts +3 -2
- package/src/llm/createDocumentRelevancyEvaluator.ts +3 -2
- package/src/llm/createHallucinationEvaluator.ts +4 -3
- package/src/llm/generateClassification.ts +2 -1
- package/src/template/applyTemplate.ts +1 -0
- package/src/template/getTemplateVariables.ts +1 -0
- package/src/types/evals.ts +6 -5
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arizeai/phoenix-evals",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "A library for running evaluations for AI use cases",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|
|
@@ -34,23 +34,25 @@
|
|
|
34
34
|
],
|
|
35
35
|
"keywords": [
|
|
36
36
|
"evals",
|
|
37
|
+
"arize",
|
|
37
38
|
"phoenix",
|
|
38
39
|
"llm",
|
|
39
40
|
"evaluation"
|
|
40
41
|
],
|
|
41
42
|
"author": "oss@arize.com",
|
|
42
|
-
"license": "
|
|
43
|
+
"license": "Apache-2.0",
|
|
43
44
|
"devDependencies": {
|
|
44
45
|
"@ai-sdk/openai": "^2.0.0",
|
|
45
46
|
"@arizeai/openinference-instrumentation-openai": "^2.3.0",
|
|
46
47
|
"@types/mustache": "^4.2.6",
|
|
47
48
|
"@types/node": "^24.0.10",
|
|
49
|
+
"msw": "^2.11.6",
|
|
48
50
|
"nock": "^14.0.5",
|
|
49
51
|
"tsx": "^4.19.3",
|
|
50
52
|
"typedoc": "^0.27.9",
|
|
51
53
|
"typescript": "^5.8.2",
|
|
52
54
|
"vitest": "^2.1.9",
|
|
53
|
-
"@arizeai/phoenix-client": "
|
|
55
|
+
"@arizeai/phoenix-client": "5.2.0"
|
|
54
56
|
},
|
|
55
57
|
"engines": {
|
|
56
58
|
"node": ">=18"
|
|
@@ -65,7 +67,7 @@
|
|
|
65
67
|
"clean": "rimraf dist",
|
|
66
68
|
"prebuild": "pnpm run clean",
|
|
67
69
|
"build": "tsc --build tsconfig.json tsconfig.esm.json && tsc-alias -p tsconfig.esm.json",
|
|
68
|
-
"postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json
|
|
70
|
+
"postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json",
|
|
69
71
|
"type:check": "tsc --noEmit",
|
|
70
72
|
"test": "vitest --typecheck",
|
|
71
73
|
"docs": "typedoc",
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
import { getTemplateVariables } from "../template";
|
|
1
2
|
import {
|
|
2
3
|
CreateClassificationEvaluatorArgs,
|
|
3
4
|
EvaluatorFn,
|
|
4
5
|
Template,
|
|
5
6
|
} from "../types";
|
|
7
|
+
|
|
6
8
|
import { createClassifierFn } from "./createClassifierFn";
|
|
7
9
|
import { LLMEvaluator } from "./LLMEvaluator";
|
|
8
|
-
import { getTemplateVariables } from "../template";
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
12
|
* An LLM evaluator that performs evaluation via classification
|
package/src/llm/LLMEvaluator.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import {
|
|
2
|
+
CreateEvaluatorArgs,
|
|
2
3
|
EvaluationResult,
|
|
3
4
|
Evaluator,
|
|
4
5
|
OptimizationDirection,
|
|
5
|
-
CreateEvaluatorArgs,
|
|
6
6
|
} from "../types";
|
|
7
7
|
|
|
8
8
|
/**
|
|
@@ -12,7 +12,7 @@ export abstract class LLMEvaluator<RecordType extends Record<string, unknown>>
|
|
|
12
12
|
implements Evaluator<RecordType>
|
|
13
13
|
{
|
|
14
14
|
readonly name: string;
|
|
15
|
-
readonly
|
|
15
|
+
readonly kind = "LLM" as const;
|
|
16
16
|
readonly optimizationDirection?: OptimizationDirection;
|
|
17
17
|
constructor({ name, optimizationDirection }: CreateEvaluatorArgs) {
|
|
18
18
|
this.name = name;
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
import { formatTemplate } from "../template";
|
|
1
2
|
import {
|
|
2
3
|
ClassificationChoicesMap,
|
|
3
|
-
EvaluationResult,
|
|
4
4
|
CreateClassifierArgs,
|
|
5
|
+
EvaluationResult,
|
|
5
6
|
EvaluatorFn,
|
|
6
7
|
} from "../types/evals";
|
|
8
|
+
|
|
7
9
|
import { generateClassification } from "./generateClassification";
|
|
8
|
-
import { formatTemplate } from "../template";
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
12
|
* Convert a mapping of choices to labels
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
|
|
2
1
|
import {
|
|
3
|
-
DOCUMENT_RELEVANCY_TEMPLATE,
|
|
4
2
|
DOCUMENT_RELEVANCY_CHOICES,
|
|
3
|
+
DOCUMENT_RELEVANCY_TEMPLATE,
|
|
5
4
|
} from "../default_templates/DOCUMENT_RELEVANCY_TEMPLATE";
|
|
5
|
+
import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
|
|
6
|
+
|
|
6
7
|
import { createClassificationEvaluator } from "./createClassificationEvaluator";
|
|
7
8
|
|
|
8
9
|
export interface DocumentRelevancyEvaluatorArgs
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import { CreateClassificationEvaluatorArgs } from "../types/evals";
|
|
2
1
|
import {
|
|
3
|
-
HALLUCINATION_TEMPLATE,
|
|
4
2
|
HALLUCINATION_CHOICES,
|
|
3
|
+
HALLUCINATION_TEMPLATE,
|
|
5
4
|
} from "../default_templates/HALLUCINATION_TEMPLATE";
|
|
6
|
-
import {
|
|
5
|
+
import { CreateClassificationEvaluatorArgs } from "../types/evals";
|
|
6
|
+
|
|
7
7
|
import { ClassificationEvaluator } from "./ClassificationEvaluator";
|
|
8
|
+
import { createClassificationEvaluator } from "./createClassificationEvaluator";
|
|
8
9
|
|
|
9
10
|
export interface HallucinationEvaluatorArgs
|
|
10
11
|
extends Omit<
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
import { tracer } from "../telemetry";
|
|
1
2
|
import { ClassificationResult, WithLLM } from "../types/evals";
|
|
2
3
|
import { WithTelemetry } from "../types/otel";
|
|
3
4
|
import type { WithPrompt } from "../types/prompts";
|
|
5
|
+
|
|
4
6
|
import { generateObject } from "ai";
|
|
5
7
|
import { z } from "zod";
|
|
6
|
-
import { tracer } from "../telemetry";
|
|
7
8
|
export type ClassifyArgs = WithLLM &
|
|
8
9
|
WithTelemetry &
|
|
9
10
|
WithPrompt & {
|
package/src/types/evals.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { LanguageModel } from "ai";
|
|
2
1
|
import { WithTelemetry } from "./otel";
|
|
3
2
|
|
|
3
|
+
import { LanguageModel } from "ai";
|
|
4
|
+
|
|
4
5
|
/**
|
|
5
6
|
* A specific AI example that is under evaluation
|
|
6
7
|
*/
|
|
@@ -101,9 +102,9 @@ export type EvaluatorFn<ExampleType extends Record<string, unknown>> = (
|
|
|
101
102
|
) => Promise<EvaluationResult>;
|
|
102
103
|
|
|
103
104
|
/**
|
|
104
|
-
* The
|
|
105
|
+
* The kind of the evaluation
|
|
105
106
|
*/
|
|
106
|
-
export type
|
|
107
|
+
export type EvaluationKind = "LLM" | "CODE";
|
|
107
108
|
|
|
108
109
|
/**
|
|
109
110
|
* The direction to optimize the numeric evaluation score
|
|
@@ -120,9 +121,9 @@ interface EvaluatorDescription {
|
|
|
120
121
|
*/
|
|
121
122
|
name: string;
|
|
122
123
|
/**
|
|
123
|
-
* The
|
|
124
|
+
* The kind of the evaluation. Also known as the "kind" of evaluator.
|
|
124
125
|
*/
|
|
125
|
-
|
|
126
|
+
kind: EvaluationKind;
|
|
126
127
|
/**
|
|
127
128
|
* The direction to optimize the numeric evaluation score
|
|
128
129
|
* E.x. "MAXIMIZE" means that the higher the score, the better the evaluation
|