@arizeai/phoenix-evals 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +42 -0
  2. package/dist/esm/core/EvaluatorBase.d.ts +19 -0
  3. package/dist/esm/core/EvaluatorBase.d.ts.map +1 -0
  4. package/dist/esm/core/EvaluatorBase.js +18 -0
  5. package/dist/esm/core/EvaluatorBase.js.map +1 -0
  6. package/dist/esm/index.d.ts +1 -0
  7. package/dist/esm/index.d.ts.map +1 -1
  8. package/dist/esm/index.js +1 -0
  9. package/dist/esm/index.js.map +1 -1
  10. package/dist/esm/llm/ClassificationEvaluator.d.ts +19 -2
  11. package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
  12. package/dist/esm/llm/ClassificationEvaluator.js +29 -2
  13. package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
  14. package/dist/esm/llm/LLMEvaluator.d.ts +4 -7
  15. package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
  16. package/dist/esm/llm/LLMEvaluator.js +4 -7
  17. package/dist/esm/llm/LLMEvaluator.js.map +1 -1
  18. package/dist/esm/llm/createClassificationEvaluator.d.ts +1 -1
  19. package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
  20. package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
  21. package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts +8 -7
  22. package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  23. package/dist/esm/llm/createDocumentRelevancyEvaluator.js +1 -2
  24. package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  25. package/dist/esm/llm/createHallucinationEvaluator.d.ts +6 -6
  26. package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
  27. package/dist/esm/llm/createHallucinationEvaluator.js +1 -2
  28. package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
  29. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  30. package/dist/esm/types/data.d.ts +194 -0
  31. package/dist/esm/types/data.d.ts.map +1 -0
  32. package/dist/esm/types/data.js +2 -0
  33. package/dist/esm/types/data.js.map +1 -0
  34. package/dist/esm/types/evals.d.ts +17 -3
  35. package/dist/esm/types/evals.d.ts.map +1 -1
  36. package/dist/esm/types/index.d.ts +1 -0
  37. package/dist/esm/types/index.d.ts.map +1 -1
  38. package/dist/esm/types/index.js +1 -0
  39. package/dist/esm/types/index.js.map +1 -1
  40. package/dist/esm/types/otel.d.ts +14 -13
  41. package/dist/esm/types/otel.d.ts.map +1 -1
  42. package/dist/esm/utils/bindEvaluator.d.ts +219 -0
  43. package/dist/esm/utils/bindEvaluator.d.ts.map +1 -0
  44. package/dist/esm/utils/bindEvaluator.js +163 -0
  45. package/dist/esm/utils/bindEvaluator.js.map +1 -0
  46. package/dist/esm/utils/index.d.ts +2 -0
  47. package/dist/esm/utils/index.d.ts.map +1 -0
  48. package/dist/esm/utils/index.js +2 -0
  49. package/dist/esm/utils/index.js.map +1 -0
  50. package/dist/esm/utils/objectMappingUtils.d.ts +166 -0
  51. package/dist/esm/utils/objectMappingUtils.d.ts.map +1 -0
  52. package/dist/esm/utils/objectMappingUtils.js +191 -0
  53. package/dist/esm/utils/objectMappingUtils.js.map +1 -0
  54. package/dist/src/core/EvaluatorBase.d.ts +19 -0
  55. package/dist/src/core/EvaluatorBase.d.ts.map +1 -0
  56. package/dist/src/core/EvaluatorBase.js +17 -0
  57. package/dist/src/core/EvaluatorBase.js.map +1 -0
  58. package/dist/src/index.d.ts +1 -0
  59. package/dist/src/index.d.ts.map +1 -1
  60. package/dist/src/index.js +1 -0
  61. package/dist/src/index.js.map +1 -1
  62. package/dist/src/llm/ClassificationEvaluator.d.ts +19 -2
  63. package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
  64. package/dist/src/llm/ClassificationEvaluator.js +13 -2
  65. package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
  66. package/dist/src/llm/LLMEvaluator.d.ts +4 -7
  67. package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
  68. package/dist/src/llm/LLMEvaluator.js +16 -5
  69. package/dist/src/llm/LLMEvaluator.js.map +1 -1
  70. package/dist/src/llm/createClassificationEvaluator.d.ts +1 -1
  71. package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
  72. package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
  73. package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts +8 -7
  74. package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  75. package/dist/src/llm/createDocumentRelevancyEvaluator.js +2 -2
  76. package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  77. package/dist/src/llm/createHallucinationEvaluator.d.ts +6 -6
  78. package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
  79. package/dist/src/llm/createHallucinationEvaluator.js +2 -2
  80. package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
  81. package/dist/src/types/data.d.ts +194 -0
  82. package/dist/src/types/data.d.ts.map +1 -0
  83. package/dist/src/types/data.js +3 -0
  84. package/dist/src/types/data.js.map +1 -0
  85. package/dist/src/types/evals.d.ts +17 -3
  86. package/dist/src/types/evals.d.ts.map +1 -1
  87. package/dist/src/types/index.d.ts +1 -0
  88. package/dist/src/types/index.d.ts.map +1 -1
  89. package/dist/src/types/index.js +1 -0
  90. package/dist/src/types/index.js.map +1 -1
  91. package/dist/src/types/otel.d.ts +14 -13
  92. package/dist/src/types/otel.d.ts.map +1 -1
  93. package/dist/src/utils/bindEvaluator.d.ts +219 -0
  94. package/dist/src/utils/bindEvaluator.d.ts.map +1 -0
  95. package/dist/src/utils/bindEvaluator.js +166 -0
  96. package/dist/src/utils/bindEvaluator.js.map +1 -0
  97. package/dist/src/utils/index.d.ts +2 -0
  98. package/dist/src/utils/index.d.ts.map +1 -0
  99. package/dist/src/utils/index.js +18 -0
  100. package/dist/src/utils/index.js.map +1 -0
  101. package/dist/src/utils/objectMappingUtils.d.ts +166 -0
  102. package/dist/src/utils/objectMappingUtils.d.ts.map +1 -0
  103. package/dist/src/utils/objectMappingUtils.js +191 -0
  104. package/dist/src/utils/objectMappingUtils.js.map +1 -0
  105. package/dist/tsconfig.tsbuildinfo +1 -1
  106. package/package.json +2 -1
  107. package/src/core/EvaluatorBase.ts +43 -0
  108. package/src/index.ts +1 -0
  109. package/src/llm/ClassificationEvaluator.ts +39 -3
  110. package/src/llm/LLMEvaluator.ts +7 -16
  111. package/src/llm/createClassificationEvaluator.ts +1 -1
  112. package/src/llm/createDocumentRelevancyEvaluator.ts +17 -12
  113. package/src/llm/createHallucinationEvaluator.ts +12 -10
  114. package/src/types/data.ts +200 -0
  115. package/src/types/evals.ts +25 -5
  116. package/src/types/index.ts +1 -0
  117. package/src/types/otel.ts +15 -13
  118. package/src/utils/bindEvaluator.ts +229 -0
  119. package/src/utils/index.ts +1 -0
  120. package/src/utils/objectMappingUtils.ts +202 -0
@@ -1,14 +1,19 @@
1
1
  import { getTemplateVariables } from "../template";
2
2
  import {
3
+ ClassificationChoicesMap,
3
4
  CreateClassificationEvaluatorArgs,
4
5
  EvaluatorFn,
5
6
  PromptTemplate,
6
7
  WithPromptTemplate,
7
8
  } from "../types";
9
+ import { ObjectMapping } from "../types/data";
10
+ import { remapObject } from "../utils/objectMappingUtils";
8
11
 
9
12
  import { createClassifierFn } from "./createClassifierFn";
10
13
  import { LLMEvaluator } from "./LLMEvaluator";
11
14
 
15
+ import { LanguageModel } from "ai";
16
+
12
17
  /**
13
18
  * An LLM evaluator that performs evaluation via classification
14
19
  */
@@ -18,14 +23,34 @@ export class ClassificationEvaluator<RecordType extends Record<string, unknown>>
18
23
  {
19
24
  readonly evaluatorFn: EvaluatorFn<RecordType>;
20
25
  readonly promptTemplate: PromptTemplate;
26
+ /**
27
+ * A dynamically computed set of prompt template variables
28
+ */
21
29
  private _promptTemplateVariables: string[] | undefined;
22
- constructor(args: CreateClassificationEvaluatorArgs) {
30
+ /**
31
+ * The model to use for classification
32
+ */
33
+ readonly model: LanguageModel;
34
+ /**
35
+ * The choices to classify the example into
36
+ */
37
+ readonly choices: ClassificationChoicesMap;
38
+
39
+ constructor(args: CreateClassificationEvaluatorArgs<RecordType>) {
23
40
  super(args);
24
41
  this.promptTemplate = args.promptTemplate;
25
- this.evaluatorFn = createClassifierFn<RecordType>(args);
42
+ this.model = args.model;
43
+ this.choices = args.choices;
44
+ this.evaluatorFn = createClassifierFn<RecordType>({
45
+ ...args,
46
+ });
26
47
  }
27
48
  evaluate = (example: RecordType) => {
28
- return this.evaluatorFn(example);
49
+ return this.evaluatorFn(
50
+ this.inputMapping
51
+ ? remapObject<RecordType>(example, this.inputMapping)
52
+ : example
53
+ );
29
54
  };
30
55
  /**
31
56
  * List out the prompt template variables needed to perform evaluation
@@ -40,4 +65,15 @@ export class ClassificationEvaluator<RecordType extends Record<string, unknown>>
40
65
  // Give a copy of the variables
41
66
  return [...this._promptTemplateVariables];
42
67
  }
68
+ /**
69
+ * Binds the input mapping to the evaluator. It makes a a copy of the evaluator and returns it.
70
+ */
71
+ bindInputMapping(
72
+ inputMapping: ObjectMapping<RecordType>
73
+ ): ClassificationEvaluator<RecordType> {
74
+ return new ClassificationEvaluator({
75
+ ...this,
76
+ inputMapping,
77
+ });
78
+ }
43
79
  }
@@ -1,22 +1,13 @@
1
- import {
2
- CreateEvaluatorArgs,
3
- EvaluationResult,
4
- Evaluator,
5
- OptimizationDirection,
6
- } from "../types";
1
+ import { EvaluatorBase } from "../core/EvaluatorBase";
2
+ import { CreateLLMEvaluatorArgs } from "../types";
7
3
 
8
4
  /**
9
5
  * Base class for llm evaluation metrics / scores
10
6
  */
11
- export abstract class LLMEvaluator<RecordType extends Record<string, unknown>>
12
- implements Evaluator<RecordType>
13
- {
14
- readonly name: string;
15
- readonly kind = "LLM" as const;
16
- readonly optimizationDirection?: OptimizationDirection;
17
- constructor({ name, optimizationDirection }: CreateEvaluatorArgs) {
18
- this.name = name;
19
- this.optimizationDirection = optimizationDirection;
7
+ export abstract class LLMEvaluator<
8
+ RecordType extends Record<string, unknown>,
9
+ > extends EvaluatorBase<RecordType> {
10
+ constructor({ ...args }: CreateLLMEvaluatorArgs<RecordType>) {
11
+ super({ kind: "LLM", ...args });
20
12
  }
21
- abstract evaluate(_example: RecordType): Promise<EvaluationResult>;
22
13
  }
@@ -5,7 +5,7 @@ import { ClassificationEvaluator } from "./ClassificationEvaluator";
5
5
  export function createClassificationEvaluator<
6
6
  RecordType extends Record<string, unknown>,
7
7
  >(
8
- args: CreateClassificationEvaluatorArgs
8
+ args: CreateClassificationEvaluatorArgs<RecordType>
9
9
  ): ClassificationEvaluator<RecordType> {
10
10
  return new ClassificationEvaluator<RecordType>(args);
11
11
  }
@@ -2,19 +2,24 @@ import {
2
2
  DOCUMENT_RELEVANCY_CHOICES,
3
3
  DOCUMENT_RELEVANCY_TEMPLATE,
4
4
  } from "../default_templates/DOCUMENT_RELEVANCY_TEMPLATE";
5
- import { CreateClassificationEvaluatorArgs, Evaluator } from "../types/evals";
5
+ import { CreateClassificationEvaluatorArgs } from "../types/evals";
6
6
 
7
+ import { ClassificationEvaluator } from "./ClassificationEvaluator";
7
8
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
8
9
 
9
- export interface DocumentRelevancyEvaluatorArgs
10
- extends Omit<
11
- CreateClassificationEvaluatorArgs,
10
+ export interface DocumentRelevancyEvaluatorArgs<
11
+ RecordType extends Record<
12
+ string,
13
+ unknown
14
+ > = DocumentRelevancyEvaluationRecord,
15
+ > extends Omit<
16
+ CreateClassificationEvaluatorArgs<RecordType>,
12
17
  "promptTemplate" | "choices" | "optimizationDirection" | "name"
13
18
  > {
14
- optimizationDirection?: CreateClassificationEvaluatorArgs["optimizationDirection"];
15
- name?: CreateClassificationEvaluatorArgs["name"];
16
- choices?: CreateClassificationEvaluatorArgs["choices"];
17
- promptTemplate?: CreateClassificationEvaluatorArgs["promptTemplate"];
19
+ optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
20
+ name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
21
+ choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
22
+ promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
18
23
  }
19
24
 
20
25
  /**
@@ -57,7 +62,9 @@ export function createDocumentRelevancyEvaluator<
57
62
  string,
58
63
  unknown
59
64
  > = DocumentRelevancyEvaluationRecord,
60
- >(args: DocumentRelevancyEvaluatorArgs): Evaluator<RecordType> {
65
+ >(
66
+ args: DocumentRelevancyEvaluatorArgs<RecordType>
67
+ ): ClassificationEvaluator<RecordType> {
61
68
  const {
62
69
  choices = DOCUMENT_RELEVANCY_CHOICES,
63
70
  promptTemplate = DOCUMENT_RELEVANCY_TEMPLATE,
@@ -65,13 +72,11 @@ export function createDocumentRelevancyEvaluator<
65
72
  name = "document_relevancy",
66
73
  ...rest
67
74
  } = args;
68
-
69
75
  return createClassificationEvaluator<RecordType>({
70
- ...args,
76
+ ...rest,
71
77
  promptTemplate,
72
78
  choices,
73
79
  optimizationDirection,
74
80
  name,
75
- ...rest,
76
81
  });
77
82
  }
@@ -7,15 +7,16 @@ import { CreateClassificationEvaluatorArgs } from "../types/evals";
7
7
  import { ClassificationEvaluator } from "./ClassificationEvaluator";
8
8
  import { createClassificationEvaluator } from "./createClassificationEvaluator";
9
9
 
10
- export interface HallucinationEvaluatorArgs
11
- extends Omit<
12
- CreateClassificationEvaluatorArgs,
10
+ export interface HallucinationEvaluatorArgs<
11
+ RecordType extends Record<string, unknown> = HallucinationEvaluationRecord,
12
+ > extends Omit<
13
+ CreateClassificationEvaluatorArgs<RecordType>,
13
14
  "promptTemplate" | "choices" | "optimizationDirection" | "name"
14
15
  > {
15
- optimizationDirection?: CreateClassificationEvaluatorArgs["optimizationDirection"];
16
- name?: CreateClassificationEvaluatorArgs["name"];
17
- choices?: CreateClassificationEvaluatorArgs["choices"];
18
- promptTemplate?: CreateClassificationEvaluatorArgs["promptTemplate"];
16
+ optimizationDirection?: CreateClassificationEvaluatorArgs<RecordType>["optimizationDirection"];
17
+ name?: CreateClassificationEvaluatorArgs<RecordType>["name"];
18
+ choices?: CreateClassificationEvaluatorArgs<RecordType>["choices"];
19
+ promptTemplate?: CreateClassificationEvaluatorArgs<RecordType>["promptTemplate"];
19
20
  }
20
21
 
21
22
  /**
@@ -35,7 +36,9 @@ export type HallucinationEvaluationRecord = {
35
36
  */
36
37
  export function createHallucinationEvaluator<
37
38
  RecordType extends Record<string, unknown> = HallucinationEvaluationRecord,
38
- >(args: HallucinationEvaluatorArgs): ClassificationEvaluator<RecordType> {
39
+ >(
40
+ args: HallucinationEvaluatorArgs<RecordType>
41
+ ): ClassificationEvaluator<RecordType> {
39
42
  const {
40
43
  choices = HALLUCINATION_CHOICES,
41
44
  promptTemplate = HALLUCINATION_TEMPLATE,
@@ -44,11 +47,10 @@ export function createHallucinationEvaluator<
44
47
  ...rest
45
48
  } = args;
46
49
  return createClassificationEvaluator<RecordType>({
47
- ...args,
50
+ ...rest,
48
51
  promptTemplate,
49
52
  choices,
50
53
  optimizationDirection,
51
54
  name,
52
- ...rest,
53
55
  });
54
56
  }
@@ -0,0 +1,200 @@
1
+ /**
2
+ * A value extractor that can retrieve data from an object using various methods.
3
+ *
4
+ * This type supports multiple ways to extract values from your data structure:
5
+ * - **String paths**: Simple property names, dot notation, or JSONPath expressions
6
+ * - **Function extractors**: Custom transformation functions
7
+ *
8
+ * @example
9
+ * **Simple property access:**
10
+ * ```typescript
11
+ * const getter: ValueGetter<{ name: string }> = "name";
12
+ * ```
13
+ *
14
+ * @example
15
+ * **Dot notation for nested properties:**
16
+ * ```typescript
17
+ * const getter: ValueGetter<{ user: { profile: { name: string } } }> = "user.profile.name";
18
+ * ```
19
+ *
20
+ * @example
21
+ * **Array element access:**
22
+ * ```typescript
23
+ * const getter: ValueGetter<{ items: string[] }> = "items[0]";
24
+ * ```
25
+ *
26
+ * @example
27
+ * **JSONPath expression:**
28
+ * ```typescript
29
+ * const getter: ValueGetter<{ items: Array<{ id: number }> }> = "$.items[*].id";
30
+ * ```
31
+ *
32
+ * @example
33
+ * **Function-based extraction:**
34
+ * ```typescript
35
+ * const getter: ValueGetter<{ firstName: string; lastName: string }> =
36
+ * (data) => `${data.firstName} ${data.lastName}`;
37
+ * ```
38
+ *
39
+ * @example
40
+ * **Complex transformation:**
41
+ * ```typescript
42
+ * const getter: ValueGetter<{ scores: number[] }> =
43
+ * (data) => data.scores.reduce((a, b) => a + b, 0) / data.scores.length;
44
+ * ```
45
+ *
46
+ * @typeParam DataType - The type of the data object to extract values from
47
+ */
48
+ export type ValueGetter<DataType extends Record<string, unknown>> =
49
+ | string
50
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
51
+ | ((data: DataType) => any);
52
+
53
+ /**
54
+ * A mapping configuration that transforms data from one structure to another.
55
+ *
56
+ * This type defines how to map fields from your data structure to the fields
57
+ * expected by an evaluator or other component. The mapping is flexible and
58
+ * supports multiple extraction methods.
59
+ *
60
+ * **Key Features:**
61
+ * - Preserves original data fields
62
+ * - Adds/overrides fields with mapped values
63
+ * - Supports nested property access
64
+ * - Supports array element access
65
+ * - Supports JSONPath expressions for complex queries
66
+ * - Supports function-based transformations
67
+ *
68
+ * @example
69
+ * **Basic field mapping:**
70
+ * ```typescript
71
+ * type MyData = {
72
+ * userQuery: string;
73
+ * context: string;
74
+ * response: string;
75
+ * };
76
+ *
77
+ * const mapping: ObjectMapping<MyData> = {
78
+ * input: "userQuery", // Map "input" to "userQuery"
79
+ * reference: "context", // Map "reference" to "context"
80
+ * output: "response", // Map "output" to "response"
81
+ * };
82
+ * ```
83
+ *
84
+ * @example
85
+ * **Nested property mapping:**
86
+ * ```typescript
87
+ * type ApiData = {
88
+ * request: {
89
+ * body: {
90
+ * query: string;
91
+ * context: string;
92
+ * };
93
+ * };
94
+ * response: {
95
+ * data: {
96
+ * text: string;
97
+ * };
98
+ * };
99
+ * };
100
+ *
101
+ * const mapping: ObjectMapping<ApiData> = {
102
+ * input: "request.body.query",
103
+ * reference: "request.body.context",
104
+ * output: "response.data.text",
105
+ * };
106
+ * ```
107
+ *
108
+ * @example
109
+ * **Array element access:**
110
+ * ```typescript
111
+ * type DataWithArrays = {
112
+ * messages: Array<{ role: string; content: string }>;
113
+ * sources: string[];
114
+ * };
115
+ *
116
+ * const mapping: ObjectMapping<DataWithArrays> = {
117
+ * firstMessage: "messages[0].content",
118
+ * lastSource: "sources[-1]", // Last element
119
+ * allRoles: "$.messages[*].role", // JSONPath for all roles
120
+ * };
121
+ * ```
122
+ *
123
+ * @example
124
+ * **Function-based transformations:**
125
+ * ```typescript
126
+ * type RawData = {
127
+ * firstName: string;
128
+ * lastName: string;
129
+ * contexts: string[];
130
+ * scores: number[];
131
+ * };
132
+ *
133
+ * const mapping: ObjectMapping<RawData> = {
134
+ * // Combine fields
135
+ * fullName: (data) => `${data.firstName} ${data.lastName}`,
136
+ * // Transform array to string
137
+ * contextText: (data) => data.contexts.join("\n\n"),
138
+ * // Calculate derived value
139
+ * averageScore: (data) =>
140
+ * data.scores.reduce((a, b) => a + b, 0) / data.scores.length,
141
+ * // Conditional logic
142
+ * status: (data) => data.scores.length > 0 ? "active" : "inactive",
143
+ * };
144
+ * ```
145
+ *
146
+ * @example
147
+ * **Mixed mapping types:**
148
+ * ```typescript
149
+ * type ComplexData = {
150
+ * user: {
151
+ * name: string;
152
+ * email: string;
153
+ * };
154
+ * items: Array<{ id: number; name: string }>;
155
+ * metadata: {
156
+ * tags: string[];
157
+ * };
158
+ * };
159
+ *
160
+ * const mapping: ObjectMapping<ComplexData> = {
161
+ * // Simple dot notation
162
+ * userName: "user.name",
163
+ * // Array access
164
+ * firstItemId: "items[0].id",
165
+ * // JSONPath for complex query
166
+ * allItemNames: "$.items[*].name",
167
+ * // Function for transformation
168
+ * formattedTags: (data) => data.metadata.tags.map(t => `#${t}`).join(" "),
169
+ * };
170
+ * ```
171
+ *
172
+ * @example
173
+ * **Real-world evaluator binding:**
174
+ * ```typescript
175
+ * import { bindEvaluator, createHallucinationEvaluator } from "@arizeai/phoenix-evals";
176
+ *
177
+ * type QAData = {
178
+ * question: string;
179
+ * context: string;
180
+ * answer: string;
181
+ * };
182
+ *
183
+ * const mapping: ObjectMapping<QAData> = {
184
+ * input: "question", // Evaluator expects "input"
185
+ * reference: "context", // Evaluator expects "reference"
186
+ * output: "answer", // Evaluator expects "output"
187
+ * };
188
+ *
189
+ * const evaluator = bindEvaluator(
190
+ * createHallucinationEvaluator({ model: openai("gpt-4") }),
191
+ * { inputMapping: mapping }
192
+ * );
193
+ * ```
194
+ *
195
+ * @typeParam DataType - The type of the data object being mapped
196
+ */
197
+ export type ObjectMapping<DataType extends Record<string, unknown>> = Record<
198
+ string,
199
+ ValueGetter<DataType>
200
+ >;
@@ -1,3 +1,4 @@
1
+ import { ObjectMapping } from "./data";
1
2
  import { WithTelemetry } from "./otel";
2
3
  import { PromptTemplate } from "./templating";
3
4
 
@@ -81,22 +82,41 @@ export interface CreateClassifierArgs extends WithTelemetry {
81
82
  promptTemplate: PromptTemplate;
82
83
  }
83
84
 
84
- export interface CreateEvaluatorArgs {
85
+ export interface CreateEvaluatorArgs<
86
+ ExampleType extends Record<string, unknown> = Record<string, unknown>,
87
+ > extends WithTelemetry {
85
88
  /**
86
89
  * The name of the metric that the evaluator produces
87
90
  * E.x. "correctness"
88
91
  */
89
92
  name: string;
93
+ /**
94
+ * The kind of the evaluation. Also known as the "kind" of evaluator.
95
+ */
96
+ kind: EvaluationKind;
90
97
  /**
91
98
  * If present, represents the direction in which you want the metric to be optimized
92
99
  * E.x. "MAXIMIZE" means you want the number to be higher.
93
100
  */
94
101
  optimizationDirection?: OptimizationDirection;
102
+ /**
103
+ * The mapping of the input to evaluate to the shape that the evaluator expects
104
+ */
105
+ inputMapping?: ObjectMapping<ExampleType>;
95
106
  }
96
107
 
97
- export interface CreateClassificationEvaluatorArgs
98
- extends CreateClassifierArgs,
99
- CreateEvaluatorArgs {}
108
+ export type CreateLLMEvaluatorArgs<RecordType extends Record<string, unknown>> =
109
+ Omit<CreateEvaluatorArgs<RecordType>, "kind">;
110
+
111
+ export interface CreateClassificationEvaluatorArgs<
112
+ RecordType extends Record<string, unknown>,
113
+ > extends CreateClassifierArgs,
114
+ CreateLLMEvaluatorArgs<RecordType> {
115
+ /**
116
+ * The prompt template to use for classification
117
+ */
118
+ promptTemplate: PromptTemplate;
119
+ }
100
120
 
101
121
  export type EvaluatorFn<ExampleType extends Record<string, unknown>> = (
102
122
  args: ExampleType
@@ -136,7 +156,7 @@ interface EvaluatorDescription {
136
156
  * The Base Evaluator interface
137
157
  * This is the interface that all evaluators must implement
138
158
  */
139
- export interface Evaluator<ExampleType extends Record<string, unknown>>
159
+ export interface EvaluatorInterface<ExampleType extends Record<string, unknown>>
140
160
  extends EvaluatorDescription {
141
161
  /**
142
162
  * The function that evaluates the example
@@ -1,3 +1,4 @@
1
1
  export * from "./evals";
2
2
  export * from "./templating";
3
3
  export * from "./otel";
4
+ export * from "./data";
package/src/types/otel.ts CHANGED
@@ -1,17 +1,19 @@
1
1
  import { Tracer } from "@opentelemetry/api";
2
2
 
3
+ export type TelemetryConfig = {
4
+ /**
5
+ * Whether OpenTelemetry is enabled on the call.
6
+ * Defaults to true for visibility into the evals calls.
7
+ * @default true
8
+ */
9
+ isEnabled?: boolean;
10
+ /**
11
+ * The tracer to use for the call.
12
+ * If not provided, the traces will get picked up by the global tracer.
13
+ */
14
+ tracer?: Tracer;
15
+ };
16
+
3
17
  export type WithTelemetry = {
4
- telemetry?: {
5
- /**
6
- * Whether OpenTelemetry is enabled on the call.
7
- * Defaults to true for visibility into the evals calls.
8
- * @default true
9
- */
10
- isEnabled?: boolean;
11
- /**
12
- * The tracer to use for the call.
13
- * If not provided, the traces will get picked up by the global tracer.
14
- */
15
- tracer?: Tracer;
16
- };
18
+ telemetry?: TelemetryConfig;
17
19
  };