@arizeai/phoenix-evals 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/dist/esm/core/EvaluatorBase.d.ts +19 -0
- package/dist/esm/core/EvaluatorBase.d.ts.map +1 -0
- package/dist/esm/core/EvaluatorBase.js +18 -0
- package/dist/esm/core/EvaluatorBase.js.map +1 -0
- package/dist/esm/index.d.ts +1 -0
- package/dist/esm/index.d.ts.map +1 -1
- package/dist/esm/index.js +1 -0
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm/ClassificationEvaluator.d.ts +20 -3
- package/dist/esm/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/ClassificationEvaluator.js +29 -2
- package/dist/esm/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/LLMEvaluator.d.ts +4 -7
- package/dist/esm/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/LLMEvaluator.js +4 -7
- package/dist/esm/llm/LLMEvaluator.js.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.d.ts +1 -1
- package/dist/esm/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts +8 -7
- package/dist/esm/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createDocumentRelevancyEvaluator.js +1 -2
- package/dist/esm/llm/createDocumentRelevancyEvaluator.js.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.d.ts +6 -6
- package/dist/esm/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/esm/llm/createHallucinationEvaluator.js +1 -2
- package/dist/esm/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/esm/template/applyTemplate.d.ts +3 -3
- package/dist/esm/template/applyTemplate.d.ts.map +1 -1
- package/dist/esm/template/applyTemplate.js +15 -1
- package/dist/esm/template/applyTemplate.js.map +1 -1
- package/dist/esm/template/createTemplateVariablesProxy.d.ts +1 -1
- package/dist/esm/template/createTemplateVariablesProxy.d.ts.map +1 -1
- package/dist/esm/template/createTemplateVariablesProxy.js.map +1 -1
- package/dist/esm/template/getTemplateVariables.d.ts +2 -2
- package/dist/esm/template/getTemplateVariables.d.ts.map +1 -1
- package/dist/esm/template/getTemplateVariables.js +16 -0
- package/dist/esm/template/getTemplateVariables.js.map +1 -1
- package/dist/esm/tsconfig.esm.tsbuildinfo +1 -1
- package/dist/esm/types/data.d.ts +194 -0
- package/dist/esm/types/data.d.ts.map +1 -0
- package/dist/esm/types/data.js +2 -0
- package/dist/esm/types/data.js.map +1 -0
- package/dist/esm/types/evals.d.ts +19 -5
- package/dist/esm/types/evals.d.ts.map +1 -1
- package/dist/esm/types/index.d.ts +1 -0
- package/dist/esm/types/index.d.ts.map +1 -1
- package/dist/esm/types/index.js +1 -0
- package/dist/esm/types/index.js.map +1 -1
- package/dist/esm/types/otel.d.ts +14 -13
- package/dist/esm/types/otel.d.ts.map +1 -1
- package/dist/esm/types/templating.d.ts +8 -6
- package/dist/esm/types/templating.d.ts.map +1 -1
- package/dist/esm/utils/bindEvaluator.d.ts +219 -0
- package/dist/esm/utils/bindEvaluator.d.ts.map +1 -0
- package/dist/esm/utils/bindEvaluator.js +163 -0
- package/dist/esm/utils/bindEvaluator.js.map +1 -0
- package/dist/esm/utils/index.d.ts +2 -0
- package/dist/esm/utils/index.d.ts.map +1 -0
- package/dist/esm/utils/index.js +2 -0
- package/dist/esm/utils/index.js.map +1 -0
- package/dist/esm/utils/objectMappingUtils.d.ts +166 -0
- package/dist/esm/utils/objectMappingUtils.d.ts.map +1 -0
- package/dist/esm/utils/objectMappingUtils.js +191 -0
- package/dist/esm/utils/objectMappingUtils.js.map +1 -0
- package/dist/src/core/EvaluatorBase.d.ts +19 -0
- package/dist/src/core/EvaluatorBase.d.ts.map +1 -0
- package/dist/src/core/EvaluatorBase.js +17 -0
- package/dist/src/core/EvaluatorBase.js.map +1 -0
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.d.ts +20 -3
- package/dist/src/llm/ClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/ClassificationEvaluator.js +13 -2
- package/dist/src/llm/ClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/LLMEvaluator.d.ts +4 -7
- package/dist/src/llm/LLMEvaluator.d.ts.map +1 -1
- package/dist/src/llm/LLMEvaluator.js +16 -5
- package/dist/src/llm/LLMEvaluator.js.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.d.ts +1 -1
- package/dist/src/llm/createClassificationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createClassificationEvaluator.js.map +1 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts +8 -7
- package/dist/src/llm/createDocumentRelevancyEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createDocumentRelevancyEvaluator.js +2 -2
- package/dist/src/llm/createDocumentRelevancyEvaluator.js.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.d.ts +6 -6
- package/dist/src/llm/createHallucinationEvaluator.d.ts.map +1 -1
- package/dist/src/llm/createHallucinationEvaluator.js +2 -2
- package/dist/src/llm/createHallucinationEvaluator.js.map +1 -1
- package/dist/src/template/applyTemplate.d.ts +3 -3
- package/dist/src/template/applyTemplate.d.ts.map +1 -1
- package/dist/src/template/applyTemplate.js +12 -1
- package/dist/src/template/applyTemplate.js.map +1 -1
- package/dist/src/template/createTemplateVariablesProxy.d.ts +1 -1
- package/dist/src/template/createTemplateVariablesProxy.d.ts.map +1 -1
- package/dist/src/template/createTemplateVariablesProxy.js.map +1 -1
- package/dist/src/template/getTemplateVariables.d.ts +2 -2
- package/dist/src/template/getTemplateVariables.d.ts.map +1 -1
- package/dist/src/template/getTemplateVariables.js +16 -0
- package/dist/src/template/getTemplateVariables.js.map +1 -1
- package/dist/src/types/data.d.ts +194 -0
- package/dist/src/types/data.d.ts.map +1 -0
- package/dist/src/types/data.js +3 -0
- package/dist/src/types/data.js.map +1 -0
- package/dist/src/types/evals.d.ts +19 -5
- package/dist/src/types/evals.d.ts.map +1 -1
- package/dist/src/types/index.d.ts +1 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +1 -0
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/types/otel.d.ts +14 -13
- package/dist/src/types/otel.d.ts.map +1 -1
- package/dist/src/types/templating.d.ts +8 -6
- package/dist/src/types/templating.d.ts.map +1 -1
- package/dist/src/utils/bindEvaluator.d.ts +219 -0
- package/dist/src/utils/bindEvaluator.d.ts.map +1 -0
- package/dist/src/utils/bindEvaluator.js +166 -0
- package/dist/src/utils/bindEvaluator.js.map +1 -0
- package/dist/src/utils/index.d.ts +2 -0
- package/dist/src/utils/index.d.ts.map +1 -0
- package/dist/src/utils/index.js +18 -0
- package/dist/src/utils/index.js.map +1 -0
- package/dist/src/utils/objectMappingUtils.d.ts +166 -0
- package/dist/src/utils/objectMappingUtils.d.ts.map +1 -0
- package/dist/src/utils/objectMappingUtils.js +191 -0
- package/dist/src/utils/objectMappingUtils.js.map +1 -0
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +3 -1
- package/src/core/EvaluatorBase.ts +43 -0
- package/src/index.ts +1 -0
- package/src/llm/ClassificationEvaluator.ts +41 -5
- package/src/llm/LLMEvaluator.ts +7 -16
- package/src/llm/createClassificationEvaluator.ts +1 -1
- package/src/llm/createDocumentRelevancyEvaluator.ts +17 -12
- package/src/llm/createHallucinationEvaluator.ts +12 -10
- package/src/template/applyTemplate.ts +22 -9
- package/src/template/createTemplateVariablesProxy.ts +3 -3
- package/src/template/getTemplateVariables.ts +18 -2
- package/src/types/data.ts +200 -0
- package/src/types/evals.ts +27 -7
- package/src/types/index.ts +1 -0
- package/src/types/otel.ts +15 -13
- package/src/types/templating.ts +9 -6
- package/src/utils/bindEvaluator.ts +229 -0
- package/src/utils/index.ts +1 -0
- package/src/utils/objectMappingUtils.ts +202 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A value extractor that can retrieve data from an object using various methods.
|
|
3
|
+
*
|
|
4
|
+
* This type supports multiple ways to extract values from your data structure:
|
|
5
|
+
* - **String paths**: Simple property names, dot notation, or JSONPath expressions
|
|
6
|
+
* - **Function extractors**: Custom transformation functions
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* **Simple property access:**
|
|
10
|
+
* ```typescript
|
|
11
|
+
* const getter: ValueGetter<{ name: string }> = "name";
|
|
12
|
+
* ```
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* **Dot notation for nested properties:**
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const getter: ValueGetter<{ user: { profile: { name: string } } }> = "user.profile.name";
|
|
18
|
+
* ```
|
|
19
|
+
*
|
|
20
|
+
* @example
|
|
21
|
+
* **Array element access:**
|
|
22
|
+
* ```typescript
|
|
23
|
+
* const getter: ValueGetter<{ items: string[] }> = "items[0]";
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* **JSONPath expression:**
|
|
28
|
+
* ```typescript
|
|
29
|
+
* const getter: ValueGetter<{ items: Array<{ id: number }> }> = "$.items[*].id";
|
|
30
|
+
* ```
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* **Function-based extraction:**
|
|
34
|
+
* ```typescript
|
|
35
|
+
* const getter: ValueGetter<{ firstName: string; lastName: string }> =
|
|
36
|
+
* (data) => `${data.firstName} ${data.lastName}`;
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* **Complex transformation:**
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const getter: ValueGetter<{ scores: number[] }> =
|
|
43
|
+
* (data) => data.scores.reduce((a, b) => a + b, 0) / data.scores.length;
|
|
44
|
+
* ```
|
|
45
|
+
*
|
|
46
|
+
* @typeParam DataType - The type of the data object to extract values from
|
|
47
|
+
*/
|
|
48
|
+
export type ValueGetter<DataType extends Record<string, unknown>> = string | ((data: DataType) => any);
|
|
49
|
+
/**
|
|
50
|
+
* A mapping configuration that transforms data from one structure to another.
|
|
51
|
+
*
|
|
52
|
+
* This type defines how to map fields from your data structure to the fields
|
|
53
|
+
* expected by an evaluator or other component. The mapping is flexible and
|
|
54
|
+
* supports multiple extraction methods.
|
|
55
|
+
*
|
|
56
|
+
* **Key Features:**
|
|
57
|
+
* - Preserves original data fields
|
|
58
|
+
* - Adds/overrides fields with mapped values
|
|
59
|
+
* - Supports nested property access
|
|
60
|
+
* - Supports array element access
|
|
61
|
+
* - Supports JSONPath expressions for complex queries
|
|
62
|
+
* - Supports function-based transformations
|
|
63
|
+
*
|
|
64
|
+
* @example
|
|
65
|
+
* **Basic field mapping:**
|
|
66
|
+
* ```typescript
|
|
67
|
+
* type MyData = {
|
|
68
|
+
* userQuery: string;
|
|
69
|
+
* context: string;
|
|
70
|
+
* response: string;
|
|
71
|
+
* };
|
|
72
|
+
*
|
|
73
|
+
* const mapping: ObjectMapping<MyData> = {
|
|
74
|
+
* input: "userQuery", // Map "input" to "userQuery"
|
|
75
|
+
* reference: "context", // Map "reference" to "context"
|
|
76
|
+
* output: "response", // Map "output" to "response"
|
|
77
|
+
* };
|
|
78
|
+
* ```
|
|
79
|
+
*
|
|
80
|
+
* @example
|
|
81
|
+
* **Nested property mapping:**
|
|
82
|
+
* ```typescript
|
|
83
|
+
* type ApiData = {
|
|
84
|
+
* request: {
|
|
85
|
+
* body: {
|
|
86
|
+
* query: string;
|
|
87
|
+
* context: string;
|
|
88
|
+
* };
|
|
89
|
+
* };
|
|
90
|
+
* response: {
|
|
91
|
+
* data: {
|
|
92
|
+
* text: string;
|
|
93
|
+
* };
|
|
94
|
+
* };
|
|
95
|
+
* };
|
|
96
|
+
*
|
|
97
|
+
* const mapping: ObjectMapping<ApiData> = {
|
|
98
|
+
* input: "request.body.query",
|
|
99
|
+
* reference: "request.body.context",
|
|
100
|
+
* output: "response.data.text",
|
|
101
|
+
* };
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* @example
|
|
105
|
+
* **Array element access:**
|
|
106
|
+
* ```typescript
|
|
107
|
+
* type DataWithArrays = {
|
|
108
|
+
* messages: Array<{ role: string; content: string }>;
|
|
109
|
+
* sources: string[];
|
|
110
|
+
* };
|
|
111
|
+
*
|
|
112
|
+
* const mapping: ObjectMapping<DataWithArrays> = {
|
|
113
|
+
* firstMessage: "messages[0].content",
|
|
114
|
+
* lastSource: "sources[-1]", // Last element
|
|
115
|
+
* allRoles: "$.messages[*].role", // JSONPath for all roles
|
|
116
|
+
* };
|
|
117
|
+
* ```
|
|
118
|
+
*
|
|
119
|
+
* @example
|
|
120
|
+
* **Function-based transformations:**
|
|
121
|
+
* ```typescript
|
|
122
|
+
* type RawData = {
|
|
123
|
+
* firstName: string;
|
|
124
|
+
* lastName: string;
|
|
125
|
+
* contexts: string[];
|
|
126
|
+
* scores: number[];
|
|
127
|
+
* };
|
|
128
|
+
*
|
|
129
|
+
* const mapping: ObjectMapping<RawData> = {
|
|
130
|
+
* // Combine fields
|
|
131
|
+
* fullName: (data) => `${data.firstName} ${data.lastName}`,
|
|
132
|
+
* // Transform array to string
|
|
133
|
+
* contextText: (data) => data.contexts.join("\n\n"),
|
|
134
|
+
* // Calculate derived value
|
|
135
|
+
* averageScore: (data) =>
|
|
136
|
+
* data.scores.reduce((a, b) => a + b, 0) / data.scores.length,
|
|
137
|
+
* // Conditional logic
|
|
138
|
+
* status: (data) => data.scores.length > 0 ? "active" : "inactive",
|
|
139
|
+
* };
|
|
140
|
+
* ```
|
|
141
|
+
*
|
|
142
|
+
* @example
|
|
143
|
+
* **Mixed mapping types:**
|
|
144
|
+
* ```typescript
|
|
145
|
+
* type ComplexData = {
|
|
146
|
+
* user: {
|
|
147
|
+
* name: string;
|
|
148
|
+
* email: string;
|
|
149
|
+
* };
|
|
150
|
+
* items: Array<{ id: number; name: string }>;
|
|
151
|
+
* metadata: {
|
|
152
|
+
* tags: string[];
|
|
153
|
+
* };
|
|
154
|
+
* };
|
|
155
|
+
*
|
|
156
|
+
* const mapping: ObjectMapping<ComplexData> = {
|
|
157
|
+
* // Simple dot notation
|
|
158
|
+
* userName: "user.name",
|
|
159
|
+
* // Array access
|
|
160
|
+
* firstItemId: "items[0].id",
|
|
161
|
+
* // JSONPath for complex query
|
|
162
|
+
* allItemNames: "$.items[*].name",
|
|
163
|
+
* // Function for transformation
|
|
164
|
+
* formattedTags: (data) => data.metadata.tags.map(t => `#${t}`).join(" "),
|
|
165
|
+
* };
|
|
166
|
+
* ```
|
|
167
|
+
*
|
|
168
|
+
* @example
|
|
169
|
+
* **Real-world evaluator binding:**
|
|
170
|
+
* ```typescript
|
|
171
|
+
* import { bindEvaluator, createHallucinationEvaluator } from "@arizeai/phoenix-evals";
|
|
172
|
+
*
|
|
173
|
+
* type QAData = {
|
|
174
|
+
* question: string;
|
|
175
|
+
* context: string;
|
|
176
|
+
* answer: string;
|
|
177
|
+
* };
|
|
178
|
+
*
|
|
179
|
+
* const mapping: ObjectMapping<QAData> = {
|
|
180
|
+
* input: "question", // Evaluator expects "input"
|
|
181
|
+
* reference: "context", // Evaluator expects "reference"
|
|
182
|
+
* output: "answer", // Evaluator expects "output"
|
|
183
|
+
* };
|
|
184
|
+
*
|
|
185
|
+
* const evaluator = bindEvaluator(
|
|
186
|
+
* createHallucinationEvaluator({ model: openai("gpt-4") }),
|
|
187
|
+
* { inputMapping: mapping }
|
|
188
|
+
* );
|
|
189
|
+
* ```
|
|
190
|
+
*
|
|
191
|
+
* @typeParam DataType - The type of the data object being mapped
|
|
192
|
+
*/
|
|
193
|
+
export type ObjectMapping<DataType extends Record<string, unknown>> = Record<string, ValueGetter<DataType>>;
|
|
194
|
+
//# sourceMappingURL=data.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.d.ts","sourceRoot":"","sources":["../../../src/types/data.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8CG;AACH,MAAM,MAAM,WAAW,CAAC,QAAQ,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,IAC5D,MAAM,GAEN,CAAC,CAAC,IAAI,EAAE,QAAQ,KAAK,GAAG,CAAC,CAAC;AAE9B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+IG;AACH,MAAM,MAAM,aAAa,CAAC,QAAQ,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,MAAM,CAC1E,MAAM,EACN,WAAW,CAAC,QAAQ,CAAC,CACtB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data.js","sourceRoot":"","sources":["../../../src/types/data.ts"],"names":[],"mappings":""}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import { ObjectMapping } from "./data.js";
|
|
1
2
|
import { WithTelemetry } from "./otel.js";
|
|
2
|
-
import {
|
|
3
|
+
import { PromptTemplate } from "./templating.js";
|
|
3
4
|
import { LanguageModel } from "ai";
|
|
4
5
|
/**
|
|
5
6
|
* A specific AI example that is under evaluation
|
|
@@ -66,21 +67,34 @@ export interface CreateClassifierArgs extends WithTelemetry {
|
|
|
66
67
|
/**
|
|
67
68
|
* The prompt template to use for classification
|
|
68
69
|
*/
|
|
69
|
-
promptTemplate:
|
|
70
|
+
promptTemplate: PromptTemplate;
|
|
70
71
|
}
|
|
71
|
-
export interface CreateEvaluatorArgs {
|
|
72
|
+
export interface CreateEvaluatorArgs<ExampleType extends Record<string, unknown> = Record<string, unknown>> extends WithTelemetry {
|
|
72
73
|
/**
|
|
73
74
|
* The name of the metric that the evaluator produces
|
|
74
75
|
* E.x. "correctness"
|
|
75
76
|
*/
|
|
76
77
|
name: string;
|
|
78
|
+
/**
|
|
79
|
+
* The kind of the evaluation. Also known as the "kind" of evaluator.
|
|
80
|
+
*/
|
|
81
|
+
kind: EvaluationKind;
|
|
77
82
|
/**
|
|
78
83
|
* If present, represents the direction in which you want the metric to be optimized
|
|
79
84
|
* E.x. "MAXIMIZE" means you want the number to be higher.
|
|
80
85
|
*/
|
|
81
86
|
optimizationDirection?: OptimizationDirection;
|
|
87
|
+
/**
|
|
88
|
+
* The mapping of the input to evaluate to the shape that the evaluator expects
|
|
89
|
+
*/
|
|
90
|
+
inputMapping?: ObjectMapping<ExampleType>;
|
|
82
91
|
}
|
|
83
|
-
export
|
|
92
|
+
export type CreateLLMEvaluatorArgs<RecordType extends Record<string, unknown>> = Omit<CreateEvaluatorArgs<RecordType>, "kind">;
|
|
93
|
+
export interface CreateClassificationEvaluatorArgs<RecordType extends Record<string, unknown>> extends CreateClassifierArgs, CreateLLMEvaluatorArgs<RecordType> {
|
|
94
|
+
/**
|
|
95
|
+
* The prompt template to use for classification
|
|
96
|
+
*/
|
|
97
|
+
promptTemplate: PromptTemplate;
|
|
84
98
|
}
|
|
85
99
|
export type EvaluatorFn<ExampleType extends Record<string, unknown>> = (args: ExampleType) => Promise<EvaluationResult>;
|
|
86
100
|
/**
|
|
@@ -114,7 +128,7 @@ interface EvaluatorDescription {
|
|
|
114
128
|
* The Base Evaluator interface
|
|
115
129
|
* This is the interface that all evaluators must implement
|
|
116
130
|
*/
|
|
117
|
-
export interface
|
|
131
|
+
export interface EvaluatorInterface<ExampleType extends Record<string, unknown>> extends EvaluatorDescription {
|
|
118
132
|
/**
|
|
119
133
|
* The function that evaluates the example
|
|
120
134
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"evals.d.ts","sourceRoot":"","sources":["../../../src/types/evals.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"evals.d.ts","sourceRoot":"","sources":["../../../src/types/evals.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AACvC,OAAO,EAAE,aAAa,EAAE,MAAM,QAAQ,CAAC;AACvC,OAAO,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9C,OAAO,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AAEnC;;GAEG;AACH,MAAM,WAAW,aAAa,CAAC,UAAU,EAAE,SAAS;IAClD,MAAM,EAAE,UAAU,CAAC;IACnB,QAAQ,CAAC,EAAE,UAAU,CAAC;IACtB,KAAK,CAAC,EAAE,SAAS,CAAC;IAClB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB;AAED,MAAM,WAAW,OAAO;IACtB,KAAK,EAAE,aAAa,CAAC;CACtB;AAGD,MAAM,WAAW,iBAAkB,SAAQ,OAAO;CAAG;AAErD;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,MAAM,wBAAwB,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE9D;;GAEG;AACH,MAAM,WAAW,oBAAqB,SAAQ,aAAa;IAIzD,KAAK,EAAE,aAAa,CAAC;IACrB;;;OAGG;IACH,OAAO,EAAE,wBAAwB,CAAC;IAClC;;OAEG;IACH,cAAc,EAAE,cAAc,CAAC;CAChC;AAED,MAAM,WAAW,mBAAmB,CAClC,WAAW,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CACrE,SAAQ,aAAa;IACrB;;;OAGG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,cAAc,CAAC;IACrB;;;OAGG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;IAC9C;;OAEG;IACH,YAAY,CAAC,EAAE,aAAa,CAAC,WAAW,CAAC,CAAC;CAC3C;AAED,MAAM,MAAM,sBAAsB,CAAC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,IAC3E,IAAI,CAAC,mBAAmB,CAAC,UAAU,CAAC,EAAE,MAAM,CAAC,CAAC;AAEhD,MAAM,WAAW,iCAAiC,CAChD,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC1C,SAAQ,oBAAoB,EAC1B,sBAAsB,CAAC,UAAU,CAAC;IACpC;;OAEG;IACH,cAAc,EAAE,cAAc,CAAC;CAChC;AAED,MAAM,MAAM,WAAW,CAAC,WAAW,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI,CACrE,IAAI,EAAE,WAAW,KACd,OAAO,CAAC,gBAAgB,CAAC,CAAC;AAE/B;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG,KAAK,GAAG,MAAM,CAAC;AAE5C;;;GAGG;AACH,MAAM,MAAM,qBAAqB,GAAG,UAAU,GAAG,UAAU,CAAC;AAE5D;;GAEG;AACH,UAAU,oBAAoB;IAC5B;;OAEG;IACH,IAAI,EAAE,MAAM,CAAC;IACb;;OAEG;IACH,IAAI,EAAE,cAAc,CAAC;IACrB;;;OAGG;IACH,qBAAqB,CAAC,EAAE,qBAAqB,CAAC;CAC/C;AAED;;;GAGG;AACH,MAAM,WAAW,kBAAkB,CAAC,WAAW,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC7E,SAAQ,oBAAoB;IAC5B;;OAEG;IACH,QAAQ,EAAE,WAAW,CAAC,WAAW,CAAC,CAAC;CACpC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,cAAc,CAAC;AAC7B,cAAc,QAAQ,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,cAAc,CAAC;AAC7B,cAAc,QAAQ,CAAC;AACvB,cAAc,QAAQ,CAAC"}
|
package/dist/esm/types/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,cAAc,CAAC;AAC7B,cAAc,QAAQ,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/types/index.ts"],"names":[],"mappings":"AAAA,cAAc,SAAS,CAAC;AACxB,cAAc,cAAc,CAAC;AAC7B,cAAc,QAAQ,CAAC;AACvB,cAAc,QAAQ,CAAC"}
|
package/dist/esm/types/otel.d.ts
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
import { Tracer } from "@opentelemetry/api";
|
|
2
|
+
export type TelemetryConfig = {
|
|
3
|
+
/**
|
|
4
|
+
* Whether OpenTelemetry is enabled on the call.
|
|
5
|
+
* Defaults to true for visibility into the evals calls.
|
|
6
|
+
* @default true
|
|
7
|
+
*/
|
|
8
|
+
isEnabled?: boolean;
|
|
9
|
+
/**
|
|
10
|
+
* The tracer to use for the call.
|
|
11
|
+
* If not provided, the traces will get picked up by the global tracer.
|
|
12
|
+
*/
|
|
13
|
+
tracer?: Tracer;
|
|
14
|
+
};
|
|
2
15
|
export type WithTelemetry = {
|
|
3
|
-
telemetry?:
|
|
4
|
-
/**
|
|
5
|
-
* Whether OpenTelemetry is enabled on the call.
|
|
6
|
-
* Defaults to true for visibility into the evals calls.
|
|
7
|
-
* @default true
|
|
8
|
-
*/
|
|
9
|
-
isEnabled?: boolean;
|
|
10
|
-
/**
|
|
11
|
-
* The tracer to use for the call.
|
|
12
|
-
* If not provided, the traces will get picked up by the global tracer.
|
|
13
|
-
*/
|
|
14
|
-
tracer?: Tracer;
|
|
15
|
-
};
|
|
16
|
+
telemetry?: TelemetryConfig;
|
|
16
17
|
};
|
|
17
18
|
//# sourceMappingURL=otel.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"otel.d.ts","sourceRoot":"","sources":["../../../src/types/otel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,MAAM,MAAM,
|
|
1
|
+
{"version":3,"file":"otel.d.ts","sourceRoot":"","sources":["../../../src/types/otel.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAE5C,MAAM,MAAM,eAAe,GAAG;IAC5B;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB;;;OAGG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF,MAAM,MAAM,aAAa,GAAG;IAC1B,SAAS,CAAC,EAAE,eAAe,CAAC;CAC7B,CAAC"}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
import type { ModelMessage } from "ai";
|
|
2
|
+
export type PromptTemplate = string | Array<ModelMessage>;
|
|
3
|
+
export type RenderedPrompt = string | Array<ModelMessage>;
|
|
2
4
|
export type TemplateVariables = Record<string, unknown>;
|
|
3
|
-
/**
|
|
4
|
-
* A class or object that has a prompt template
|
|
5
|
-
*/
|
|
6
5
|
export interface WithPromptTemplate {
|
|
7
|
-
readonly promptTemplate:
|
|
8
|
-
|
|
6
|
+
readonly promptTemplate: PromptTemplate;
|
|
7
|
+
/**
|
|
8
|
+
* List out the prompt template variables needed to perform evaluation
|
|
9
|
+
*/
|
|
10
|
+
readonly promptTemplateVariables: string[];
|
|
9
11
|
}
|
|
10
12
|
//# sourceMappingURL=templating.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"templating.d.ts","sourceRoot":"","sources":["../../../src/types/templating.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,
|
|
1
|
+
{"version":3,"file":"templating.d.ts","sourceRoot":"","sources":["../../../src/types/templating.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAEvC,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC;AAC1D,MAAM,MAAM,cAAc,GAAG,MAAM,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC;AAC1D,MAAM,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAExD,MAAM,WAAW,kBAAkB;IACjC,QAAQ,CAAC,cAAc,EAAE,cAAc,CAAC;IACxC;;OAEG;IACH,QAAQ,CAAC,uBAAuB,EAAE,MAAM,EAAE,CAAC;CAC5C"}
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
import { EvaluatorBase } from "../core/EvaluatorBase.js";
|
|
2
|
+
import { ObjectMapping } from "../types/data.js";
|
|
3
|
+
/**
|
|
4
|
+
* Context for binding an evaluator with input mapping configuration.
|
|
5
|
+
*
|
|
6
|
+
* This type defines the structure for binding an evaluator to a specific data shape
|
|
7
|
+
* by mapping the evaluator's expected input fields to the actual data structure.
|
|
8
|
+
*
|
|
9
|
+
* @example
|
|
10
|
+
* ```typescript
|
|
11
|
+
* // Map evaluator fields to your data structure
|
|
12
|
+
* const context: BindingContext<MyDataType> = {
|
|
13
|
+
* inputMapping: {
|
|
14
|
+
* input: "userQuery", // Maps "input" to "userQuery" field
|
|
15
|
+
* reference: "context", // Maps "reference" to "context" field
|
|
16
|
+
* output: "modelResponse", // Maps "output" to "modelResponse" field
|
|
17
|
+
* },
|
|
18
|
+
* };
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```typescript
|
|
23
|
+
* // Using nested property access
|
|
24
|
+
* const context: BindingContext<ApiResponse> = {
|
|
25
|
+
* inputMapping: {
|
|
26
|
+
* input: "request.body.query",
|
|
27
|
+
* reference: "request.body.context",
|
|
28
|
+
* output: "response.data.text",
|
|
29
|
+
* },
|
|
30
|
+
* };
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```typescript
|
|
35
|
+
* // Using function-based mapping for transformations
|
|
36
|
+
* const context: BindingContext<RawData> = {
|
|
37
|
+
* inputMapping: {
|
|
38
|
+
* input: "question",
|
|
39
|
+
* reference: (data) => data.context.join("\n"), // Transform array to string
|
|
40
|
+
* output: "answer",
|
|
41
|
+
* },
|
|
42
|
+
* };
|
|
43
|
+
* ```
|
|
44
|
+
*
|
|
45
|
+
* @typeParam RecordType - The type of the data record that will be evaluated
|
|
46
|
+
*/
|
|
47
|
+
export type BindingContext<RecordType extends Record<string, unknown>> = {
|
|
48
|
+
/**
|
|
49
|
+
* Mapping of evaluator input fields to data source fields.
|
|
50
|
+
*
|
|
51
|
+
* The keys represent the field names expected by the evaluator (e.g., "input", "output", "reference"),
|
|
52
|
+
* and the values specify how to extract those fields from your data structure.
|
|
53
|
+
*
|
|
54
|
+
* Supports:
|
|
55
|
+
* - Simple property names: `"fieldName"`
|
|
56
|
+
* - Dot notation: `"user.profile.name"`
|
|
57
|
+
* - Array access: `"items[0].id"`
|
|
58
|
+
* - JSONPath expressions: `"$.items[*].id"`
|
|
59
|
+
* - Function extractors: `(data) => data.customField.toUpperCase()`
|
|
60
|
+
*/
|
|
61
|
+
inputMapping: ObjectMapping<RecordType>;
|
|
62
|
+
};
|
|
63
|
+
/**
|
|
64
|
+
* Binds an evaluator to a specific data structure using input mapping.
|
|
65
|
+
*
|
|
66
|
+
* This function creates a new evaluator instance that automatically transforms
|
|
67
|
+
* your data structure to match what the evaluator expects. This is particularly
|
|
68
|
+
* useful when your data schema doesn't match the evaluator's expected input format.
|
|
69
|
+
*
|
|
70
|
+
* @param evaluator - The evaluator to bind (e.g., a hallucination evaluator)
|
|
71
|
+
* @param context - The binding context containing the input mapping configuration
|
|
72
|
+
* @returns A new evaluator instance with the input mapping applied
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* **Basic usage with simple field mapping:**
|
|
76
|
+
* ```typescript
|
|
77
|
+
* import { bindEvaluator, createHallucinationEvaluator } from "@arizeai/phoenix-evals";
|
|
78
|
+
* import { openai } from "@ai-sdk/openai";
|
|
79
|
+
*
|
|
80
|
+
* type MyData = {
|
|
81
|
+
* question: string;
|
|
82
|
+
* context: string;
|
|
83
|
+
* answer: string;
|
|
84
|
+
* };
|
|
85
|
+
*
|
|
86
|
+
* const evaluator = bindEvaluator<MyData>(
|
|
87
|
+
* createHallucinationEvaluator({ model: openai("gpt-4") }),
|
|
88
|
+
* {
|
|
89
|
+
* inputMapping: {
|
|
90
|
+
* input: "question", // Evaluator expects "input", map from "question"
|
|
91
|
+
* reference: "context", // Evaluator expects "reference", map from "context"
|
|
92
|
+
* output: "answer", // Evaluator expects "output", map from "answer"
|
|
93
|
+
* },
|
|
94
|
+
* }
|
|
95
|
+
* );
|
|
96
|
+
*
|
|
97
|
+
* // Now you can evaluate with your data structure
|
|
98
|
+
* const result = await evaluator.evaluate({
|
|
99
|
+
* question: "What is AI?",
|
|
100
|
+
* context: "AI is artificial intelligence...",
|
|
101
|
+
* answer: "AI stands for artificial intelligence",
|
|
102
|
+
* });
|
|
103
|
+
* ```
|
|
104
|
+
*
|
|
105
|
+
* @example
|
|
106
|
+
* **Using nested property access:**
|
|
107
|
+
* ```typescript
|
|
108
|
+
* type ApiResponse = {
|
|
109
|
+
* request: {
|
|
110
|
+
* body: {
|
|
111
|
+
* query: string;
|
|
112
|
+
* context: string;
|
|
113
|
+
* };
|
|
114
|
+
* };
|
|
115
|
+
* response: {
|
|
116
|
+
* data: {
|
|
117
|
+
* text: string;
|
|
118
|
+
* };
|
|
119
|
+
* };
|
|
120
|
+
* };
|
|
121
|
+
*
|
|
122
|
+
* const evaluator = bindEvaluator<ApiResponse>(
|
|
123
|
+
* createHallucinationEvaluator({ model: openai("gpt-4") }),
|
|
124
|
+
* {
|
|
125
|
+
* inputMapping: {
|
|
126
|
+
* input: "request.body.query",
|
|
127
|
+
* reference: "request.body.context",
|
|
128
|
+
* output: "response.data.text",
|
|
129
|
+
* },
|
|
130
|
+
* }
|
|
131
|
+
* );
|
|
132
|
+
* ```
|
|
133
|
+
*
|
|
134
|
+
* @example
|
|
135
|
+
* **Using function-based mapping for data transformation:**
|
|
136
|
+
* ```typescript
|
|
137
|
+
* type RawData = {
|
|
138
|
+
* question: string;
|
|
139
|
+
* contexts: string[]; // Array of context strings
|
|
140
|
+
* answer: string;
|
|
141
|
+
* };
|
|
142
|
+
*
|
|
143
|
+
* const evaluator = bindEvaluator<RawData>(
|
|
144
|
+
* createHallucinationEvaluator({ model: openai("gpt-4") }),
|
|
145
|
+
* {
|
|
146
|
+
* inputMapping: {
|
|
147
|
+
* input: "question",
|
|
148
|
+
* // Transform array to single string
|
|
149
|
+
* reference: (data) => data.contexts.join("\n\n"),
|
|
150
|
+
* output: "answer",
|
|
151
|
+
* },
|
|
152
|
+
* }
|
|
153
|
+
* );
|
|
154
|
+
* ```
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* **Using JSONPath for complex queries:**
|
|
158
|
+
* ```typescript
|
|
159
|
+
* type ComplexData = {
|
|
160
|
+
* conversation: {
|
|
161
|
+
* messages: Array<{ role: string; content: string }>;
|
|
162
|
+
* };
|
|
163
|
+
* metadata: {
|
|
164
|
+
* sources: string[];
|
|
165
|
+
* };
|
|
166
|
+
* };
|
|
167
|
+
*
|
|
168
|
+
* const evaluator = bindEvaluator<ComplexData>(
|
|
169
|
+
* createHallucinationEvaluator({ model: openai("gpt-4") }),
|
|
170
|
+
* {
|
|
171
|
+
* inputMapping: {
|
|
172
|
+
* // Extract last user message
|
|
173
|
+
* input: "$.conversation.messages[?(@.role=='user')].content[-1]",
|
|
174
|
+
* // Extract all sources
|
|
175
|
+
* reference: "$.metadata.sources[*]",
|
|
176
|
+
* // Extract last assistant message
|
|
177
|
+
* output: "$.conversation.messages[?(@.role=='assistant')].content[-1]",
|
|
178
|
+
* },
|
|
179
|
+
* }
|
|
180
|
+
* );
|
|
181
|
+
* ```
|
|
182
|
+
*
|
|
183
|
+
* @example
|
|
184
|
+
* **Binding multiple evaluators with different mappings:**
|
|
185
|
+
* ```typescript
|
|
186
|
+
* type EvaluationData = {
|
|
187
|
+
* userQuery: string;
|
|
188
|
+
* systemContext: string;
|
|
189
|
+
* modelOutput: string;
|
|
190
|
+
* expectedOutput?: string;
|
|
191
|
+
* };
|
|
192
|
+
*
|
|
193
|
+
* // Hallucination evaluator
|
|
194
|
+
* const hallucinationEvaluator = bindEvaluator<EvaluationData>(
|
|
195
|
+
* createHallucinationEvaluator({ model: openai("gpt-4") }),
|
|
196
|
+
* {
|
|
197
|
+
* inputMapping: {
|
|
198
|
+
* input: "userQuery",
|
|
199
|
+
* reference: "systemContext",
|
|
200
|
+
* output: "modelOutput",
|
|
201
|
+
* },
|
|
202
|
+
* }
|
|
203
|
+
* );
|
|
204
|
+
*
|
|
205
|
+
* // Document relevancy evaluator (if it exists)
|
|
206
|
+
* const relevancyEvaluator = bindEvaluator<EvaluationData>(
|
|
207
|
+
* createDocumentRelevancyEvaluator({ model: openai("gpt-4") }),
|
|
208
|
+
* {
|
|
209
|
+
* inputMapping: {
|
|
210
|
+
* query: "userQuery",
|
|
211
|
+
* document: "systemContext",
|
|
212
|
+
* output: "modelOutput",
|
|
213
|
+
* },
|
|
214
|
+
* }
|
|
215
|
+
* );
|
|
216
|
+
* ```
|
|
217
|
+
*/
|
|
218
|
+
export declare function bindEvaluator<RecordType extends Record<string, unknown>>(evaluator: EvaluatorBase<RecordType>, context: BindingContext<RecordType>): EvaluatorBase<RecordType>;
|
|
219
|
+
//# sourceMappingURL=bindEvaluator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bindEvaluator.d.ts","sourceRoot":"","sources":["../../../src/utils/bindEvaluator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAE9C;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2CG;AACH,MAAM,MAAM,cAAc,CAAC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,IAAI;IACvE;;;;;;;;;;;;OAYG;IACH,YAAY,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;CACzC,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0JG;AACH,wBAAgB,aAAa,CAAC,UAAU,SAAS,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EACtE,SAAS,EAAE,aAAa,CAAC,UAAU,CAAC,EACpC,OAAO,EAAE,cAAc,CAAC,UAAU,CAAC,GAClC,aAAa,CAAC,UAAU,CAAC,CAM3B"}
|