@arizeai/phoenix-evals 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/README.md +42 -0
  2. package/dist/esm/core/EvaluatorBase.d.ts +19 -0
  3. package/dist/esm/core/EvaluatorBase.d.ts.map +1 -0
  4. package/dist/esm/core/EvaluatorBase.js +18 -0
  5. package/dist/esm/core/EvaluatorBase.js.map +1 -0
  6. package/dist/esm/index.d.ts +1 -0
  7. package/dist/esm/index.d.ts.map +1 -1
  8. package/dist/esm/index.js +1 -0
  9. package/dist/esm/index.js.map +1 -1
  10. package/dist/esm/llm/ClassificationEvaluator.d.ts +19 -2
  11. package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
  12. package/dist/esm/llm/ClassificationEvaluator.js +29 -2
  13. package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
  14. package/dist/esm/llm/LLMEvaluator.d.ts +4 -7
  15. package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
  16. package/dist/esm/llm/LLMEvaluator.js +4 -7
  17. package/dist/esm/llm/LLMEvaluator.js.map +1 -1
  18. package/dist/esm/llm/createClassificationEvaluator.d.ts +1 -1
  19. package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
  20. package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
  21. package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts +8 -7
  22. package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  23. package/dist/esm/llm/createDocumentRelevancyEvaluator.js +1 -2
  24. package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  25. package/dist/esm/llm/createHallucinationEvaluator.d.ts +6 -6
  26. package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
  27. package/dist/esm/llm/createHallucinationEvaluator.js +1 -2
  28. package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
  29. package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
  30. package/dist/esm/types/data.d.ts +194 -0
  31. package/dist/esm/types/data.d.ts.map +1 -0
  32. package/dist/esm/types/data.js +2 -0
  33. package/dist/esm/types/data.js.map +1 -0
  34. package/dist/esm/types/evals.d.ts +17 -3
  35. package/dist/esm/types/evals.d.ts.map +1 -1
  36. package/dist/esm/types/index.d.ts +1 -0
  37. package/dist/esm/types/index.d.ts.map +1 -1
  38. package/dist/esm/types/index.js +1 -0
  39. package/dist/esm/types/index.js.map +1 -1
  40. package/dist/esm/types/otel.d.ts +14 -13
  41. package/dist/esm/types/otel.d.ts.map +1 -1
  42. package/dist/esm/utils/bindEvaluator.d.ts +219 -0
  43. package/dist/esm/utils/bindEvaluator.d.ts.map +1 -0
  44. package/dist/esm/utils/bindEvaluator.js +163 -0
  45. package/dist/esm/utils/bindEvaluator.js.map +1 -0
  46. package/dist/esm/utils/index.d.ts +2 -0
  47. package/dist/esm/utils/index.d.ts.map +1 -0
  48. package/dist/esm/utils/index.js +2 -0
  49. package/dist/esm/utils/index.js.map +1 -0
  50. package/dist/esm/utils/objectMappingUtils.d.ts +166 -0
  51. package/dist/esm/utils/objectMappingUtils.d.ts.map +1 -0
  52. package/dist/esm/utils/objectMappingUtils.js +191 -0
  53. package/dist/esm/utils/objectMappingUtils.js.map +1 -0
  54. package/dist/src/core/EvaluatorBase.d.ts +19 -0
  55. package/dist/src/core/EvaluatorBase.d.ts.map +1 -0
  56. package/dist/src/core/EvaluatorBase.js +17 -0
  57. package/dist/src/core/EvaluatorBase.js.map +1 -0
  58. package/dist/src/index.d.ts +1 -0
  59. package/dist/src/index.d.ts.map +1 -1
  60. package/dist/src/index.js +1 -0
  61. package/dist/src/index.js.map +1 -1
  62. package/dist/src/llm/ClassificationEvaluator.d.ts +19 -2
  63. package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
  64. package/dist/src/llm/ClassificationEvaluator.js +13 -2
  65. package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
  66. package/dist/src/llm/LLMEvaluator.d.ts +4 -7
  67. package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
  68. package/dist/src/llm/LLMEvaluator.js +16 -5
  69. package/dist/src/llm/LLMEvaluator.js.map +1 -1
  70. package/dist/src/llm/createClassificationEvaluator.d.ts +1 -1
  71. package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
  72. package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
  73. package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts +8 -7
  74. package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
  75. package/dist/src/llm/createDocumentRelevancyEvaluator.js +2 -2
  76. package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +1 -1
  77. package/dist/src/llm/createHallucinationEvaluator.d.ts +6 -6
  78. package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
  79. package/dist/src/llm/createHallucinationEvaluator.js +2 -2
  80. package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
  81. package/dist/src/types/data.d.ts +194 -0
  82. package/dist/src/types/data.d.ts.map +1 -0
  83. package/dist/src/types/data.js +3 -0
  84. package/dist/src/types/data.js.map +1 -0
  85. package/dist/src/types/evals.d.ts +17 -3
  86. package/dist/src/types/evals.d.ts.map +1 -1
  87. package/dist/src/types/index.d.ts +1 -0
  88. package/dist/src/types/index.d.ts.map +1 -1
  89. package/dist/src/types/index.js +1 -0
  90. package/dist/src/types/index.js.map +1 -1
  91. package/dist/src/types/otel.d.ts +14 -13
  92. package/dist/src/types/otel.d.ts.map +1 -1
  93. package/dist/src/utils/bindEvaluator.d.ts +219 -0
  94. package/dist/src/utils/bindEvaluator.d.ts.map +1 -0
  95. package/dist/src/utils/bindEvaluator.js +166 -0
  96. package/dist/src/utils/bindEvaluator.js.map +1 -0
  97. package/dist/src/utils/index.d.ts +2 -0
  98. package/dist/src/utils/index.d.ts.map +1 -0
  99. package/dist/src/utils/index.js +18 -0
  100. package/dist/src/utils/index.js.map +1 -0
  101. package/dist/src/utils/objectMappingUtils.d.ts +166 -0
  102. package/dist/src/utils/objectMappingUtils.d.ts.map +1 -0
  103. package/dist/src/utils/objectMappingUtils.js +191 -0
  104. package/dist/src/utils/objectMappingUtils.js.map +1 -0
  105. package/dist/tsconfig.tsbuildinfo +1 -1
  106. package/package.json +2 -1
  107. package/src/core/EvaluatorBase.ts +43 -0
  108. package/src/index.ts +1 -0
  109. package/src/llm/ClassificationEvaluator.ts +39 -3
  110. package/src/llm/LLMEvaluator.ts +7 -16
  111. package/src/llm/createClassificationEvaluator.ts +1 -1
  112. package/src/llm/createDocumentRelevancyEvaluator.ts +17 -12
  113. package/src/llm/createHallucinationEvaluator.ts +12 -10
  114. package/src/types/data.ts +200 -0
  115. package/src/types/evals.ts +25 -5
  116. package/src/types/index.ts +1 -0
  117. package/src/types/otel.ts +15 -13
  118. package/src/utils/bindEvaluator.ts +229 -0
  119. package/src/utils/index.ts +1 -0
  120. package/src/utils/objectMappingUtils.ts +202 -0
@@ -0,0 +1,229 @@
1
+ import { EvaluatorBase } from "../core/EvaluatorBase";
2
+ import { ObjectMapping } from "../types/data";
3
+
4
+ /**
5
+ * Context for binding an evaluator with input mapping configuration.
6
+ *
7
+ * This type defines the structure for binding an evaluator to a specific data shape
8
+ * by mapping the evaluator's expected input fields to the actual data structure.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * // Map evaluator fields to your data structure
13
+ * const context: BindingContext<MyDataType> = {
14
+ * inputMapping: {
15
+ * input: "userQuery", // Maps "input" to "userQuery" field
16
+ * reference: "context", // Maps "reference" to "context" field
17
+ * output: "modelResponse", // Maps "output" to "modelResponse" field
18
+ * },
19
+ * };
20
+ * ```
21
+ *
22
+ * @example
23
+ * ```typescript
24
+ * // Using nested property access
25
+ * const context: BindingContext<ApiResponse> = {
26
+ * inputMapping: {
27
+ * input: "request.body.query",
28
+ * reference: "request.body.context",
29
+ * output: "response.data.text",
30
+ * },
31
+ * };
32
+ * ```
33
+ *
34
+ * @example
35
+ * ```typescript
36
+ * // Using function-based mapping for transformations
37
+ * const context: BindingContext<RawData> = {
38
+ * inputMapping: {
39
+ * input: "question",
40
+ * reference: (data) => data.context.join("\n"), // Transform array to string
41
+ * output: "answer",
42
+ * },
43
+ * };
44
+ * ```
45
+ *
46
+ * @typeParam RecordType - The type of the data record that will be evaluated
47
+ */
48
+ export type BindingContext<RecordType extends Record<string, unknown>> = {
49
+ /**
50
+ * Mapping of evaluator input fields to data source fields.
51
+ *
52
+ * The keys represent the field names expected by the evaluator (e.g., "input", "output", "reference"),
53
+ * and the values specify how to extract those fields from your data structure.
54
+ *
55
+ * Supports:
56
+ * - Simple property names: `"fieldName"`
57
+ * - Dot notation: `"user.profile.name"`
58
+ * - Array access: `"items[0].id"`
59
+ * - JSONPath expressions: `"$.items[*].id"`
60
+ * - Function extractors: `(data) => data.customField.toUpperCase()`
61
+ */
62
+ inputMapping: ObjectMapping<RecordType>;
63
+ };
64
+
65
+ /**
66
+ * Binds an evaluator to a specific data structure using input mapping.
67
+ *
68
+ * This function creates a new evaluator instance that automatically transforms
69
+ * your data structure to match what the evaluator expects. This is particularly
70
+ * useful when your data schema doesn't match the evaluator's expected input format.
71
+ *
72
+ * @param evaluator - The evaluator to bind (e.g., a hallucination evaluator)
73
+ * @param context - The binding context containing the input mapping configuration
74
+ * @returns A new evaluator instance with the input mapping applied
75
+ *
76
+ * @example
77
+ * **Basic usage with simple field mapping:**
78
+ * ```typescript
79
+ * import { bindEvaluator, createHallucinationEvaluator } from "@arizeai/phoenix-evals";
80
+ * import { openai } from "@ai-sdk/openai";
81
+ *
82
+ * type MyData = {
83
+ * question: string;
84
+ * context: string;
85
+ * answer: string;
86
+ * };
87
+ *
88
+ * const evaluator = bindEvaluator<MyData>(
89
+ * createHallucinationEvaluator({ model: openai("gpt-4") }),
90
+ * {
91
+ * inputMapping: {
92
+ * input: "question", // Evaluator expects "input", map from "question"
93
+ * reference: "context", // Evaluator expects "reference", map from "context"
94
+ * output: "answer", // Evaluator expects "output", map from "answer"
95
+ * },
96
+ * }
97
+ * );
98
+ *
99
+ * // Now you can evaluate with your data structure
100
+ * const result = await evaluator.evaluate({
101
+ * question: "What is AI?",
102
+ * context: "AI is artificial intelligence...",
103
+ * answer: "AI stands for artificial intelligence",
104
+ * });
105
+ * ```
106
+ *
107
+ * @example
108
+ * **Using nested property access:**
109
+ * ```typescript
110
+ * type ApiResponse = {
111
+ * request: {
112
+ * body: {
113
+ * query: string;
114
+ * context: string;
115
+ * };
116
+ * };
117
+ * response: {
118
+ * data: {
119
+ * text: string;
120
+ * };
121
+ * };
122
+ * };
123
+ *
124
+ * const evaluator = bindEvaluator<ApiResponse>(
125
+ * createHallucinationEvaluator({ model: openai("gpt-4") }),
126
+ * {
127
+ * inputMapping: {
128
+ * input: "request.body.query",
129
+ * reference: "request.body.context",
130
+ * output: "response.data.text",
131
+ * },
132
+ * }
133
+ * );
134
+ * ```
135
+ *
136
+ * @example
137
+ * **Using function-based mapping for data transformation:**
138
+ * ```typescript
139
+ * type RawData = {
140
+ * question: string;
141
+ * contexts: string[]; // Array of context strings
142
+ * answer: string;
143
+ * };
144
+ *
145
+ * const evaluator = bindEvaluator<RawData>(
146
+ * createHallucinationEvaluator({ model: openai("gpt-4") }),
147
+ * {
148
+ * inputMapping: {
149
+ * input: "question",
150
+ * // Transform array to single string
151
+ * reference: (data) => data.contexts.join("\n\n"),
152
+ * output: "answer",
153
+ * },
154
+ * }
155
+ * );
156
+ * ```
157
+ *
158
+ * @example
159
+ * **Using JSONPath for complex queries:**
160
+ * ```typescript
161
+ * type ComplexData = {
162
+ * conversation: {
163
+ * messages: Array<{ role: string; content: string }>;
164
+ * };
165
+ * metadata: {
166
+ * sources: string[];
167
+ * };
168
+ * };
169
+ *
170
+ * const evaluator = bindEvaluator<ComplexData>(
171
+ * createHallucinationEvaluator({ model: openai("gpt-4") }),
172
+ * {
173
+ * inputMapping: {
174
+ * // Extract last user message
175
+ * input: "$.conversation.messages[?(@.role=='user')].content[-1]",
176
+ * // Extract all sources
177
+ * reference: "$.metadata.sources[*]",
178
+ * // Extract last assistant message
179
+ * output: "$.conversation.messages[?(@.role=='assistant')].content[-1]",
180
+ * },
181
+ * }
182
+ * );
183
+ * ```
184
+ *
185
+ * @example
186
+ * **Binding multiple evaluators with different mappings:**
187
+ * ```typescript
188
+ * type EvaluationData = {
189
+ * userQuery: string;
190
+ * systemContext: string;
191
+ * modelOutput: string;
192
+ * expectedOutput?: string;
193
+ * };
194
+ *
195
+ * // Hallucination evaluator
196
+ * const hallucinationEvaluator = bindEvaluator<EvaluationData>(
197
+ * createHallucinationEvaluator({ model: openai("gpt-4") }),
198
+ * {
199
+ * inputMapping: {
200
+ * input: "userQuery",
201
+ * reference: "systemContext",
202
+ * output: "modelOutput",
203
+ * },
204
+ * }
205
+ * );
206
+ *
207
+ * // Document relevancy evaluator (if it exists)
208
+ * const relevancyEvaluator = bindEvaluator<EvaluationData>(
209
+ * createDocumentRelevancyEvaluator({ model: openai("gpt-4") }),
210
+ * {
211
+ * inputMapping: {
212
+ * query: "userQuery",
213
+ * document: "systemContext",
214
+ * output: "modelOutput",
215
+ * },
216
+ * }
217
+ * );
218
+ * ```
219
+ */
220
+ export function bindEvaluator<RecordType extends Record<string, unknown>>(
221
+ evaluator: EvaluatorBase<RecordType>,
222
+ context: BindingContext<RecordType>
223
+ ): EvaluatorBase<RecordType> {
224
+ let boundEvaluator: EvaluatorBase<RecordType> = evaluator;
225
+ if (context.inputMapping) {
226
+ boundEvaluator = boundEvaluator.bindInputMapping(context.inputMapping);
227
+ }
228
+ return boundEvaluator;
229
+ }
@@ -0,0 +1 @@
1
+ export * from "./bindEvaluator";
@@ -0,0 +1,202 @@
1
+ import { ObjectMapping, ValueGetter } from "../types/data";
2
+
3
+ import { JSONPath } from "jsonpath-plus";
4
+
5
+ /**
6
+ * Remaps an object by applying field mappings while preserving original data.
7
+ *
8
+ * This function takes your original data object and a mapping configuration,
9
+ * then returns a new object that contains:
10
+ * - All original fields from the input data
11
+ * - Additional/overridden fields based on the mapping
12
+ *
13
+ * The mapping allows you to extract values using:
14
+ * - Simple property names: `"fieldName"`
15
+ * - Dot notation: `"user.profile.name"`
16
+ * - Array access: `"items[0].id"`
17
+ * - JSONPath expressions: `"$.items[*].id"`
18
+ * - Function extractors: `(data) => data.customField`
19
+ *
20
+ * @param data - The input data object to remap
21
+ * @param mapping - The mapping configuration defining how to extract/transform values
22
+ * @returns A new object with original fields plus mapped fields
23
+ *
24
+ * @example
25
+ * **Basic remapping:**
26
+ * ```typescript
27
+ * const data = {
28
+ * name: "John",
29
+ * age: 30,
30
+ * email: "john@example.com",
31
+ * };
32
+ *
33
+ * const mapping: ObjectMapping<typeof data> = {
34
+ * userName: "name",
35
+ * userAge: "age",
36
+ * };
37
+ *
38
+ * const result = remapObject(data, mapping);
39
+ * // Result: {
40
+ * // name: "John",
41
+ * // age: 30,
42
+ * // email: "john@example.com",
43
+ * // userName: "John", // Added from mapping
44
+ * // userAge: 30, // Added from mapping
45
+ * // }
46
+ * ```
47
+ *
48
+ * @example
49
+ * **Nested property extraction:**
50
+ * ```typescript
51
+ * const data = {
52
+ * user: {
53
+ * profile: {
54
+ * firstName: "John",
55
+ * lastName: "Doe",
56
+ * },
57
+ * email: "john@example.com",
58
+ * },
59
+ * };
60
+ *
61
+ * const mapping: ObjectMapping<typeof data> = {
62
+ * firstName: "user.profile.firstName",
63
+ * lastName: "user.profile.lastName",
64
+ * email: "user.email",
65
+ * };
66
+ *
67
+ * const result = remapObject(data, mapping);
68
+ * // Result includes original data plus:
69
+ * // {
70
+ * // firstName: "John",
71
+ * // lastName: "Doe",
72
+ * // email: "john@example.com",
73
+ * // }
74
+ * ```
75
+ *
76
+ * @example
77
+ * **Array element access:**
78
+ * ```typescript
79
+ * const data = {
80
+ * items: [
81
+ * { id: 1, name: "Apple" },
82
+ * { id: 2, name: "Banana" },
83
+ * ],
84
+ * };
85
+ *
86
+ * const mapping: ObjectMapping<typeof data> = {
87
+ * firstItemId: "items[0].id",
88
+ * firstItemName: "items[0].name",
89
+ * };
90
+ *
91
+ * const result = remapObject(data, mapping);
92
+ * // Result includes:
93
+ * // {
94
+ * // firstItemId: 1,
95
+ * // firstItemName: "Apple",
96
+ * // }
97
+ * ```
98
+ *
99
+ * @example
100
+ * **Function-based transformation:**
101
+ * ```typescript
102
+ * const data = {
103
+ * firstName: "John",
104
+ * lastName: "Doe",
105
+ * scores: [85, 92, 78],
106
+ * };
107
+ *
108
+ * const mapping: ObjectMapping<typeof data> = {
109
+ * fullName: (data) => `${data.firstName} ${data.lastName}`,
110
+ * averageScore: (data) =>
111
+ * data.scores.reduce((a, b) => a + b, 0) / data.scores.length,
112
+ * };
113
+ *
114
+ * const result = remapObject(data, mapping);
115
+ * // Result includes:
116
+ * // {
117
+ * // fullName: "John Doe",
118
+ * // averageScore: 85,
119
+ * // }
120
+ * ```
121
+ *
122
+ * @example
123
+ * **Field override:**
124
+ * ```typescript
125
+ * const data = {
126
+ * name: "John",
127
+ * status: "inactive",
128
+ * };
129
+ *
130
+ * const mapping: ObjectMapping<typeof data> = {
131
+ * // Override existing field
132
+ * status: (data) => data.name === "John" ? "active" : "inactive",
133
+ * // Add new field
134
+ * displayName: (data) => `User: ${data.name}`,
135
+ * };
136
+ *
137
+ * const result = remapObject(data, mapping);
138
+ * // Result:
139
+ * // {
140
+ * // name: "John",
141
+ * // status: "active", // Overridden
142
+ * // displayName: "User: John", // Added
143
+ * // }
144
+ * ```
145
+ *
146
+ * @example
147
+ * **Real-world evaluator usage:**
148
+ * ```typescript
149
+ * // Your data structure
150
+ * const example = {
151
+ * question: "What is AI?",
152
+ * context: "AI is artificial intelligence...",
153
+ * answer: "AI stands for artificial intelligence",
154
+ * };
155
+ *
156
+ * // Evaluator expects: { input, reference, output }
157
+ * const mapping: ObjectMapping<typeof example> = {
158
+ * input: "question",
159
+ * reference: "context",
160
+ * output: "answer",
161
+ * };
162
+ *
163
+ * const remapped = remapObject(example, mapping);
164
+ * // Now remapped has: { question, context, answer, input, reference, output }
165
+ * // The evaluator can access input, reference, and output fields
166
+ * ```
167
+ */
168
+ export function remapObject<DataType extends Record<string, unknown>>(
169
+ data: DataType,
170
+ mapping: ObjectMapping<DataType>
171
+ ): DataType {
172
+ return {
173
+ ...data,
174
+ ...Object.fromEntries(
175
+ Object.entries(mapping).map(([key, value]) => [
176
+ key,
177
+ getMappedObjectValue(data, value),
178
+ ])
179
+ ),
180
+ };
181
+ }
182
+
183
+ /**
184
+ * Extracts a value from a data object using a value getter.
185
+ *
186
+ * This internal function handles the actual extraction logic, supporting both
187
+ * string-based paths (including JSONPath) and function-based extractors.
188
+ *
189
+ * @param data - The data object to extract from
190
+ * @param valueGetter - The value getter (string path or function)
191
+ * @returns The extracted value
192
+ *
193
+ * @internal
194
+ */
195
+ function getMappedObjectValue<DataType extends Record<string, unknown>>(
196
+ data: DataType,
197
+ valueGetter: ValueGetter<DataType>
198
+ ): DataType[keyof DataType] {
199
+ return typeof valueGetter === "function"
200
+ ? valueGetter(data)
201
+ : JSONPath({ path: valueGetter, json: data, wrap: false });
202
+ }