@arizeai/phoenix-evals 0.0.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.d.ts +1 -1
  2. package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js +2 -2
  3. package/dist/esm/default_templates/HALLUCINATION_TEMPLATE.js.map +1 -1
  4. package/dist/esm/llm/createClassificationEvaluator.d.ts +3 -0
  5. package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -0
  6. package/dist/esm/llm/createClassificationEvaluator.js +10 -0
  7. package/dist/esm/llm/createClassificationEvaluator.js.map +1 -0
  8. package/dist/esm/llm/createClassifierFn.d.ts +6 -0
  9. package/dist/esm/llm/createClassifierFn.d.ts.map +1 -0
  10. package/dist/esm/llm/{createClassifier.js → createClassifierFn.js} +3 -3
  11. package/dist/esm/llm/createClassifierFn.js.map +1 -0
  12. package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts +8 -6
  13. package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  14. package/dist/esm/llm/createDocumentRelevancyEvaluator.js +6 -5
  15. package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  16. package/dist/esm/llm/createHallucinationEvaluator.d.ts +7 -5
  17. package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
  18. package/dist/esm/llm/createHallucinationEvaluator.js +5 -4
  19. package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
  20. package/dist/esm/llm/index.d.ts +2 -1
  21. package/dist/esm/llm/index.d.ts.map +1 -1
  22. package/dist/esm/llm/index.js +2 -1
  23. package/dist/esm/llm/index.js.map +1 -1
  24. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  25. package/dist/esm/types/evals.d.ts +45 -0
  26. package/dist/esm/types/evals.d.ts.map +1 -1
  27. package/dist/src/default_templates/HALLUCINATION_TEMPLATE.d.ts +1 -1
  28. package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js +2 -2
  29. package/dist/src/default_templates/HALLUCINATION_TEMPLATE.js.map +1 -1
  30. package/dist/src/llm/createClassificationEvaluator.d.ts +3 -0
  31. package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -0
  32. package/dist/src/llm/createClassificationEvaluator.js +13 -0
  33. package/dist/src/llm/createClassificationEvaluator.js.map +1 -0
  34. package/dist/src/llm/createClassifierFn.d.ts +6 -0
  35. package/dist/src/llm/createClassifierFn.d.ts.map +1 -0
  36. package/dist/src/llm/{createClassifier.js → createClassifierFn.js} +4 -4
  37. package/dist/src/llm/createClassifierFn.js.map +1 -0
  38. package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts +8 -6
  39. package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  40. package/dist/src/llm/createDocumentRelevancyEvaluator.js +7 -6
  41. package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  42. package/dist/src/llm/createHallucinationEvaluator.d.ts +7 -5
  43. package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
  44. package/dist/src/llm/createHallucinationEvaluator.js +6 -5
  45. package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
  46. package/dist/src/llm/index.d.ts +2 -1
  47. package/dist/src/llm/index.d.ts.map +1 -1
  48. package/dist/src/llm/index.js +2 -1
  49. package/dist/src/llm/index.js.map +1 -1
  50. package/dist/src/types/evals.d.ts +45 -0
  51. package/dist/src/types/evals.d.ts.map +1 -1
  52. package/dist/tsconfig.tsbuildinfo +1 -1
  53. package/package.json +2 -2
  54. package/src/default_templates/HALLUCINATION_TEMPLATE.ts +2 -2
  55. package/src/llm/createClassificationEvaluator.ts +13 -0
  56. package/src/llm/{createClassifier.ts → createClassifierFn.ts} +2 -2
  57. package/src/llm/createDocumentRelevancyEvaluator.ts +23 -15
  58. package/src/llm/createHallucinationEvaluator.ts +16 -8
  59. package/src/llm/index.ts +2 -1
  60. package/src/types/evals.ts +49 -0
  61. package/dist/esm/llm/createClassifier.d.ts +0 -6
  62. package/dist/esm/llm/createClassifier.d.ts.map +0 -1
  63. package/dist/esm/llm/createClassifier.js.map +0 -1
  64. package/dist/src/llm/createClassifier.d.ts +0 -6
  65. package/dist/src/llm/createClassifier.d.ts.map +0 -1
  66. package/dist/src/llm/createClassifier.js.map +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arizeai/phoenix-evals",
3
- "version": "0.0.8",
3
+ "version": "0.2.0",
4
4
  "description": "A library for running evaluations for AI use cases",
5
5
  "main": "dist/src/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -50,7 +50,7 @@
50
50
  "typedoc": "^0.27.9",
51
51
  "typescript": "^5.8.2",
52
52
  "vitest": "^2.1.9",
53
- "@arizeai/phoenix-client": "2.4.0"
53
+ "@arizeai/phoenix-client": "3.0.0"
54
54
  },
55
55
  "engines": {
56
56
  "node": ">=18"
@@ -26,6 +26,6 @@ Is the answer above factual or hallucinated based on the query and reference tex
26
26
  `;
27
27
 
28
28
  export const HALLUCINATION_CHOICES = {
29
- factual: 1,
30
- hallucinated: 0,
29
+ hallucinated: 1,
30
+ factual: 0,
31
31
  };
@@ -0,0 +1,13 @@
1
+ import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
2
+ import { createClassifierFn } from "./createClassifierFn";
3
+
4
+ export function createClassificationEvaluator<
5
+ ExampleType extends Record<string, unknown>,
6
+ >(args: CreateClassificationEvaluatorArgs): Evaluator<ExampleType> {
7
+ return {
8
+ name: args.name,
9
+ source: "LLM",
10
+ optimizationDirection: args.optimizationDirection,
11
+ evaluate: createClassifierFn(args),
12
+ };
13
+ }
@@ -22,9 +22,9 @@ function choicesToLabels(
22
22
  }
23
23
 
24
24
  /**
25
- * A function that serves as a factory that will output a classification evaluator
25
+ * A function that serves as a factory that will output a classification evaluator function
26
26
  */
27
- export function createClassifier<ExampleType extends Record<string, unknown>>(
27
+ export function createClassifierFn<ExampleType extends Record<string, unknown>>(
28
28
  args: CreateClassifierArgs
29
29
  ): EvaluatorFn<ExampleType> {
30
30
  const { model, choices, promptTemplate, ...rest } = args;
@@ -1,14 +1,19 @@
1
- import { createClassifier } from "./createClassifier";
2
- import { CreateClassifierArgs, EvaluatorFn } from "../types/evals";
1
+ import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
3
2
  import {
4
3
  DOCUMENT_RELEVANCY_TEMPLATE,
5
4
  DOCUMENT_RELEVANCY_CHOICES,
6
5
  } from "../default_templates/DOCUMENT_RELEVANCY_TEMPLATE";
6
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
7
7
 
8
8
  export interface DocumentRelevancyEvaluatorArgs
9
- extends Omit<CreateClassifierArgs, "promptTemplate" | "choices"> {
10
- choices?: CreateClassifierArgs["choices"];
11
- promptTemplate?: CreateClassifierArgs["promptTemplate"];
9
+ extends Omit<
10
+ CreateClassificationEvaluatorArgs,
11
+ "promptTemplate" | "choices" | "optimizationDirection" | "name"
12
+ > {
13
+ optimizationDirection?: CreateClassificationEvaluatorArgs["optimizationDirection"];
14
+ name?: CreateClassificationEvaluatorArgs["name"];
15
+ choices?: CreateClassificationEvaluatorArgs["choices"];
16
+ promptTemplate?: CreateClassificationEvaluatorArgs["promptTemplate"];
12
17
  }
13
18
 
14
19
  /**
@@ -38,7 +43,7 @@ export type DocumentRelevancyExample = {
38
43
  * @example
39
44
  * ```ts
40
45
  * const evaluator = createDocumentRelevancyEvaluator({ model: openai("gpt-4o-mini") });
41
- * const result = await evaluator({
46
+ * const result = await evaluator.evaluate({
42
47
  * input: "What is the capital of France?",
43
48
  * documentText: "Paris is the capital and most populous city of France.",
44
49
  * });
@@ -47,18 +52,21 @@ export type DocumentRelevancyExample = {
47
52
  */
48
53
  export function createDocumentRelevancyEvaluator(
49
54
  args: DocumentRelevancyEvaluatorArgs
50
- ): EvaluatorFn<DocumentRelevancyExample> {
55
+ ): Evaluator<DocumentRelevancyExample> {
51
56
  const {
52
57
  choices = DOCUMENT_RELEVANCY_CHOICES,
53
58
  promptTemplate = DOCUMENT_RELEVANCY_TEMPLATE,
59
+ optimizationDirection = "MAXIMIZE",
60
+ name = "document_relevancy",
54
61
  ...rest
55
62
  } = args;
56
- const documentRelevancyEvaluatorFn =
57
- createClassifier<DocumentRelevancyExample>({
58
- ...args,
59
- promptTemplate,
60
- choices,
61
- ...rest,
62
- });
63
- return documentRelevancyEvaluatorFn;
63
+
64
+ return createClassificationEvaluator<DocumentRelevancyExample>({
65
+ ...args,
66
+ promptTemplate,
67
+ choices,
68
+ optimizationDirection,
69
+ name,
70
+ ...rest,
71
+ });
64
72
  }
@@ -1,14 +1,19 @@
1
- import { createClassifier } from "./createClassifier";
2
- import { CreateClassifierArgs, EvaluatorFn } from "../types/evals";
1
+ import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
3
2
  import {
4
3
  HALLUCINATION_TEMPLATE,
5
4
  HALLUCINATION_CHOICES,
6
5
  } from "../default_templates/HALLUCINATION_TEMPLATE";
6
+ import { createClassificationEvaluator } from "./createClassificationEvaluator";
7
7
 
8
8
  export interface HallucinationEvaluatorArgs
9
- extends Omit<CreateClassifierArgs, "promptTemplate" | "choices"> {
10
- choices?: CreateClassifierArgs["choices"];
11
- promptTemplate?: CreateClassifierArgs["promptTemplate"];
9
+ extends Omit<
10
+ CreateClassificationEvaluatorArgs,
11
+ "promptTemplate" | "choices" | "optimizationDirection" | "name"
12
+ > {
13
+ optimizationDirection?: CreateClassificationEvaluatorArgs["optimizationDirection"];
14
+ name?: CreateClassificationEvaluatorArgs["name"];
15
+ choices?: CreateClassificationEvaluatorArgs["choices"];
16
+ promptTemplate?: CreateClassificationEvaluatorArgs["promptTemplate"];
12
17
  }
13
18
 
14
19
  /**
@@ -28,17 +33,20 @@ export type HallucinationExample = {
28
33
  */
29
34
  export function createHallucinationEvaluator(
30
35
  args: HallucinationEvaluatorArgs
31
- ): EvaluatorFn<HallucinationExample> {
36
+ ): Evaluator<HallucinationExample> {
32
37
  const {
33
38
  choices = HALLUCINATION_CHOICES,
34
39
  promptTemplate = HALLUCINATION_TEMPLATE,
40
+ optimizationDirection = "MINIMIZE",
41
+ name = "hallucination",
35
42
  ...rest
36
43
  } = args;
37
- const hallucinationEvaluatorFn = createClassifier<HallucinationExample>({
44
+ return createClassificationEvaluator<HallucinationExample>({
38
45
  ...args,
39
46
  promptTemplate,
40
47
  choices,
48
+ optimizationDirection,
49
+ name,
41
50
  ...rest,
42
51
  });
43
- return hallucinationEvaluatorFn;
44
52
  }
package/src/llm/index.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export * from "./generateClassification";
2
- export * from "./createClassifier";
2
+ export * from "./createClassifierFn";
3
+ export * from "./createClassificationEvaluator";
3
4
  export * from "./createHallucinationEvaluator";
4
5
  export * from "./createDocumentRelevancyEvaluator";
@@ -78,6 +78,55 @@ export interface CreateClassifierArgs extends WithTelemetry {
78
78
  promptTemplate: string;
79
79
  }
80
80
 
81
+ export interface CreateClassificationEvaluatorArgs
82
+ extends CreateClassifierArgs {
83
+ /**
84
+ * The name of the metric that the evaluator produces
85
+ * E.x. "correctness"
86
+ */
87
+ name: string;
88
+ /**
89
+ * If present, represents the direction in which you want the metric to be optimized
90
+ * E.x. "MAXIMIZE" means you want the number to be higher.
91
+ */
92
+ optimizationDirection?: OptimizationDirection;
93
+ }
94
+
81
95
  export type EvaluatorFn<ExampleType extends Record<string, unknown>> = (
82
96
  args: ExampleType
83
97
  ) => Promise<EvaluationResult>;
98
+
99
+ /**
100
+ * The source of the evaluation
101
+ */
102
+ type EvaluationSource = "LLM" | "CODE";
103
+
104
+ /**
105
+ * The direction to optimize the numeric evaluation score
106
+ * E.x. "MAXIMIZE" means that the higher the score, the better the evaluation
107
+ */
108
+ type OptimizationDirection = "MAXIMIZE" | "MINIMIZE";
109
+
110
+ /**
111
+ * The Base Evaluator interface
112
+ * This is the interface that all evaluators must implement
113
+ */
114
+ export interface Evaluator<ExampleType extends Record<string, unknown>> {
115
+ /**
116
+ * The name of the evaluator / the metric that it measures
117
+ */
118
+ name: string;
119
+ /**
120
+ * The source of the evaluation. Also known as the "kind" of evaluator.
121
+ */
122
+ source: EvaluationSource;
123
+ /**
124
+ * The direction to optimize the numeric evaluation score
125
+ * E.x. "MAXIMIZE" means that the higher the score, the better the evaluation
126
+ */
127
+ optimizationDirection?: OptimizationDirection;
128
+ /**
129
+ * The function that evaluates the example
130
+ */
131
+ evaluate: EvaluatorFn<ExampleType>;
132
+ }
@@ -1,6 +0,0 @@
1
- import { CreateClassifierArgs, EvaluatorFn } from "../types/evals.js";
2
- /**
3
- * A function that serves as a factory that will output a classification evaluator
4
- */
5
- export declare function createClassifier<ExampleType extends Record<string, unknown>>(args: CreateClassifierArgs): EvaluatorFn<ExampleType>;
6
- //# sourceMappingURL=createClassifier.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"createClassifier.d.ts","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,oBAAoB,EACpB,WAAW,EACZ,MAAM,gBAAgB,CAAC;AAkBxB;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC1E,IAAI,EAAE,oBAAoB,GACzB,WAAW,CAAC,WAAW,CAAC,CA4B1B"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"createClassifier.js","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE7C;;;GAGG;AACH,SAAS,eAAe,CACtB,OAAiC;IAEjC,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,MAA+B,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC9B,IAA0B;IAE1B,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC;IAEzD,OAAO,KAAK,EAAE,IAAiB,EAA6B,EAAE;QAC5D,MAAM,iBAAiB,GAAG;YACxB,GAAG,IAAI;SACR,CAAC;QAEF,MAAM,MAAM,GAAG,cAAc,CAAC;YAC5B,QAAQ,EAAE,cAAc;YACxB,SAAS,EAAE,iBAAiB;SAC7B,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,MAAM,sBAAsB,CAAC;YAClD,KAAK;YACL,MAAM,EAAE,eAAe,CAAC,OAAO,CAAC;YAChC,MAAM;YACN,GAAG,IAAI;SACR,CAAC,CAAC;QAEH,mEAAmE;QACnE,MAAM,KAAK,GAAG,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAE5C,OAAO;YACL,KAAK;YACL,GAAG,cAAc;SAClB,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
@@ -1,6 +0,0 @@
1
- import { CreateClassifierArgs, EvaluatorFn } from "../types/evals";
2
- /**
3
- * A function that serves as a factory that will output a classification evaluator
4
- */
5
- export declare function createClassifier<ExampleType extends Record<string, unknown>>(args: CreateClassifierArgs): EvaluatorFn<ExampleType>;
6
- //# sourceMappingURL=createClassifier.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"createClassifier.d.ts","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":"AAAA,OAAO,EAGL,oBAAoB,EACpB,WAAW,EACZ,MAAM,gBAAgB,CAAC;AAkBxB;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAC1E,IAAI,EAAE,oBAAoB,GACzB,WAAW,CAAC,WAAW,CAAC,CA4B1B"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"createClassifier.js","sourceRoot":"","sources":["../../../src/llm/createClassifier.ts"],"names":[],"mappings":";;;;;;;;;;;;;AA0BA,4CA8BC;AAlDD,qEAAkE;AAClE,0CAA6C;AAE7C;;;GAGG;AACH,SAAS,eAAe,CACtB,OAAiC;IAEjC,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,qBAAqB,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,MAA+B,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,SAAgB,gBAAgB,CAC9B,IAA0B;IAE1B,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,KAAc,IAAI,EAAb,IAAI,UAAK,IAAI,EAAlD,sCAA2C,CAAO,CAAC;IAEzD,OAAO,KAAK,EAAE,IAAiB,EAA6B,EAAE;QAC5D,MAAM,iBAAiB,qBAClB,IAAI,CACR,CAAC;QAEF,MAAM,MAAM,GAAG,IAAA,yBAAc,EAAC;YAC5B,QAAQ,EAAE,cAAc;YACxB,SAAS,EAAE,iBAAiB;SAC7B,CAAC,CAAC;QAEH,MAAM,cAAc,GAAG,MAAM,IAAA,+CAAsB,kBACjD,KAAK,EACL,MAAM,EAAE,eAAe,CAAC,OAAO,CAAC,EAChC,MAAM,IACH,IAAI,EACP,CAAC;QAEH,mEAAmE;QACnE,MAAM,KAAK,GAAG,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QAE5C,uBACE,KAAK,IACF,cAAc,EACjB;IACJ,CAAC,CAAC;AACJ,CAAC"}