@arizeai/phoenix-evals 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/LICENSE +205 -0
  2. package/README.md +10 -0
  3. package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
  4. package/dist/esm/llm/ClassificationEvaluator.js +1 -1
  5. package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
  6. package/dist/esm/llm/LLMEvaluator.d.ts +2 -2
  7. package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
  8. package/dist/esm/llm/LLMEvaluator.js +1 -1
  9. package/dist/esm/llm/LLMEvaluator.js.map +1 -1
  10. package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
  11. package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
  12. package/dist/esm/llm/createClassifierFn.d.ts.map +1 -1
  13. package/dist/esm/llm/createClassifierFn.js +1 -1
  14. package/dist/esm/llm/createClassifierFn.js.map +1 -1
  15. package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  16. package/dist/esm/llm/createDocumentRelevancyEvaluator.js +1 -1
  17. package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  18. package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
  19. package/dist/esm/llm/createHallucinationEvaluator.js +1 -1
  20. package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
  21. package/dist/esm/llm/generateClassification.d.ts.map +1 -1
  22. package/dist/esm/llm/generateClassification.js +1 -1
  23. package/dist/esm/llm/generateClassification.js.map +1 -1
  24. package/dist/esm/template/applyTemplate.d.ts.map +1 -1
  25. package/dist/esm/template/applyTemplate.js.map +1 -1
  26. package/dist/esm/template/getTemplateVariables.d.ts.map +1 -1
  27. package/dist/esm/template/getTemplateVariables.js.map +1 -1
  28. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  29. package/dist/esm/types/evals.d.ts +5 -5
  30. package/dist/esm/types/evals.d.ts.map +1 -1
  31. package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
  32. package/dist/src/llm/ClassificationEvaluator.js +1 -1
  33. package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
  34. package/dist/src/llm/LLMEvaluator.d.ts +2 -2
  35. package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
  36. package/dist/src/llm/LLMEvaluator.js +1 -1
  37. package/dist/src/llm/LLMEvaluator.js.map +1 -1
  38. package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
  39. package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
  40. package/dist/src/llm/createClassifierFn.d.ts.map +1 -1
  41. package/dist/src/llm/createClassifierFn.js +1 -1
  42. package/dist/src/llm/createClassifierFn.js.map +1 -1
  43. package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  44. package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  45. package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
  46. package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
  47. package/dist/src/llm/generateClassification.d.ts.map +1 -1
  48. package/dist/src/llm/generateClassification.js +1 -1
  49. package/dist/src/llm/generateClassification.js.map +1 -1
  50. package/dist/src/template/applyTemplate.d.ts.map +1 -1
  51. package/dist/src/template/applyTemplate.js.map +1 -1
  52. package/dist/src/template/getTemplateVariables.d.ts.map +1 -1
  53. package/dist/src/template/getTemplateVariables.js.map +1 -1
  54. package/dist/src/types/evals.d.ts +5 -5
  55. package/dist/src/types/evals.d.ts.map +1 -1
  56. package/dist/tsconfig.tsbuildinfo +1 -1
  57. package/package.json +6 -4
  58. package/src/llm/ClassificationEvaluator.ts +2 -1
  59. package/src/llm/LLMEvaluator.ts +2 -2
  60. package/src/llm/createClassificationEvaluator.ts +1 -0
  61. package/src/llm/createClassifierFn.ts +3 -2
  62. package/src/llm/createDocumentRelevancyEvaluator.ts +3 -2
  63. package/src/llm/createHallucinationEvaluator.ts +4 -3
  64. package/src/llm/generateClassification.ts +2 -1
  65. package/src/template/applyTemplate.ts +1 -0
  66. package/src/template/getTemplateVariables.ts +1 -0
  67. package/src/types/evals.ts +6 -5
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arizeai/phoenix-evals",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "A library for running evaluations for AI use cases",
5
5
  "main": "dist/src/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -34,23 +34,25 @@
34
34
  ],
35
35
  "keywords": [
36
36
  "evals",
37
+ "arize",
37
38
  "phoenix",
38
39
  "llm",
39
40
  "evaluation"
40
41
  ],
41
42
  "author": "oss@arize.com",
42
- "license": "ELv2",
43
+ "license": "Apache-2.0",
43
44
  "devDependencies": {
44
45
  "@ai-sdk/openai": "^2.0.0",
45
46
  "@arizeai/openinference-instrumentation-openai": "^2.3.0",
46
47
  "@types/mustache": "^4.2.6",
47
48
  "@types/node": "^24.0.10",
49
+ "msw": "^2.11.6",
48
50
  "nock": "^14.0.5",
49
51
  "tsx": "^4.19.3",
50
52
  "typedoc": "^0.27.9",
51
53
  "typescript": "^5.8.2",
52
54
  "vitest": "^2.1.9",
53
- "@arizeai/phoenix-client": "4.0.3"
55
+ "@arizeai/phoenix-client": "5.2.0"
54
56
  },
55
57
  "engines": {
56
58
  "node": ">=18"
@@ -65,7 +67,7 @@
65
67
  "clean": "rimraf dist",
66
68
  "prebuild": "pnpm run clean",
67
69
  "build": "tsc --build tsconfig.json tsconfig.esm.json && tsc-alias -p tsconfig.esm.json",
68
- "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json && rimraf dist/test dist/examples",
70
+ "postbuild": "echo '{\"type\": \"module\"}' > ./dist/esm/package.json",
69
71
  "type:check": "tsc --noEmit",
70
72
  "test": "vitest --typecheck",
71
73
  "docs": "typedoc",
@@ -1,11 +1,12 @@
1
+ import { getTemplateVariables } from "../template";
1
2
  import {
2
3
  CreateClassificationEvaluatorArgs,
3
4
  EvaluatorFn,
4
5
  Template,
5
6
  } from "../types";
7
+
6
8
  import { createClassifierFn } from "./createClassifierFn";
7
9
  import { LLMEvaluator } from "./LLMEvaluator";
8
- import { getTemplateVariables } from "../template";
9
10
 
10
11
  /**
11
12
  * An LLM evaluator that performs evaluation via classification
@@ -1,8 +1,8 @@
1
1
  import {
2
+ CreateEvaluatorArgs,
2
3
  EvaluationResult,
3
4
  Evaluator,
4
5
  OptimizationDirection,
5
- CreateEvaluatorArgs,
6
6
  } from "../types";
7
7
 
8
8
  /**
@@ -12,7 +12,7 @@ export abstract class LLMEvaluator<RecordType extends Record<string, unknown>>
12
12
  implements Evaluator<RecordType>
13
13
  {
14
14
  readonly name: string;
15
- readonly source = "LLM" as const;
15
+ readonly kind = "LLM" as const;
16
16
  readonly optimizationDirection?: OptimizationDirection;
17
17
  constructor({ name, optimizationDirection }: CreateEvaluatorArgs) {
18
18
  this.name = name;
@@ -1,4 +1,5 @@
1
1
  import { CreateClassificationEvaluatorArgs } from "../types/evals";
2
+
2
3
  import { ClassificationEvaluator } from "./ClassificationEvaluator";
3
4
 
4
5
  export function createClassificationEvaluator<
@@ -1,11 +1,12 @@
1
+ import { formatTemplate } from "../template";
1
2
  import {
2
3
  ClassificationChoicesMap,
3
- EvaluationResult,
4
4
  CreateClassifierArgs,
5
+ EvaluationResult,
5
6
  EvaluatorFn,
6
7
  } from "../types/evals";
8
+
7
9
  import { generateClassification } from "./generateClassification";
8
- import { formatTemplate } from "../template";
9
10
 
10
11
  /**
11
12
  * Convert a mapping of choices to labels
@@ -1,8 +1,9 @@
1
- import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
2
1
  import {
3
- DOCUMENT_RELEVANCY_TEMPLATE,
4
2
  DOCUMENT_RELEVANCY_CHOICES,
3
+ DOCUMENT_RELEVANCY_TEMPLATE,
5
4
  } from "../default_templates/DOCUMENT_RELEVANCY_TEMPLATE";
5
+ import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
6
+
6
7
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
7
8
 
8
9
  export interface DocumentRelevancyEvaluatorArgs
@@ -1,10 +1,11 @@
1
- import { CreateClassificationEvaluatorArgs } from "../types/evals";
2
1
  import {
3
- HALLUCINATION_TEMPLATE,
4
2
  HALLUCINATION_CHOICES,
3
+ HALLUCINATION_TEMPLATE,
5
4
  } from "../default_templates/HALLUCINATION_TEMPLATE";
6
- import { createClassificationEvaluator } from "./createClassificationEvaluator";
5
+ import { CreateClassificationEvaluatorArgs } from "../types/evals";
6
+
7
7
  import { ClassificationEvaluator } from "./ClassificationEvaluator";
8
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
8
9
 
9
10
  export interface HallucinationEvaluatorArgs
10
11
  extends Omit<
@@ -1,9 +1,10 @@
1
+ import { tracer } from "../telemetry";
1
2
  import { ClassificationResult, WithLLM } from "../types/evals";
2
3
  import { WithTelemetry } from "../types/otel";
3
4
  import type { WithPrompt } from "../types/prompts";
5
+
4
6
  import { generateObject } from "ai";
5
7
  import { z } from "zod";
6
- import { tracer } from "../telemetry";
7
8
  export type ClassifyArgs = WithLLM &
8
9
  WithTelemetry &
9
10
  WithPrompt & {
@@ -1,4 +1,5 @@
1
1
  import { Template } from "../types/templating";
2
+
2
3
  import Mustache from "mustache";
3
4
 
4
5
  /**
@@ -1,4 +1,5 @@
1
1
  import { Template } from "../types/templating";
2
+
2
3
  import Mustache from "mustache";
3
4
 
4
5
  type GetTemplateVariableArgs = {
@@ -1,6 +1,7 @@
1
- import { LanguageModel } from "ai";
2
1
  import { WithTelemetry } from "./otel";
3
2
 
3
+ import { LanguageModel } from "ai";
4
+
4
5
  /**
5
6
  * A specific AI example that is under evaluation
6
7
  */
@@ -101,9 +102,9 @@ export type EvaluatorFn<ExampleType extends Record<string, unknown>> = (
101
102
  ) => Promise<EvaluationResult>;
102
103
 
103
104
  /**
104
- * The source of the evaluation
105
+ * The kind of the evaluation
105
106
  */
106
- export type EvaluationSource = "LLM" | "CODE";
107
+ export type EvaluationKind = "LLM" | "CODE";
107
108
 
108
109
  /**
109
110
  * The direction to optimize the numeric evaluation score
@@ -120,9 +121,9 @@ interface EvaluatorDescription {
120
121
  */
121
122
  name: string;
122
123
  /**
123
- * The source of the evaluation. Also known as the "kind" of evaluator.
124
+ * The kind of the evaluation. Also known as the "kind" of evaluator.
124
125
  */
125
- source: EvaluationSource;
126
+ kind: EvaluationKind;
126
127
  /**
127
128
  * The direction to optimize the numeric evaluation score
128
129
  * E.x. "MAXIMIZE" means that the higher the score, the better the evaluation