arize 8.0.0a22__py3-none-any.whl → 8.0.0a23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +17 -9
- arize/_exporter/client.py +55 -36
- arize/_exporter/parsers/tracing_data_parser.py +41 -30
- arize/_exporter/validation.py +3 -3
- arize/_flight/client.py +207 -76
- arize/_generated/api_client/__init__.py +30 -6
- arize/_generated/api_client/api/__init__.py +1 -0
- arize/_generated/api_client/api/datasets_api.py +864 -190
- arize/_generated/api_client/api/experiments_api.py +167 -131
- arize/_generated/api_client/api/projects_api.py +1197 -0
- arize/_generated/api_client/api_client.py +2 -2
- arize/_generated/api_client/configuration.py +42 -34
- arize/_generated/api_client/exceptions.py +2 -2
- arize/_generated/api_client/models/__init__.py +15 -4
- arize/_generated/api_client/models/dataset.py +10 -10
- arize/_generated/api_client/models/dataset_example.py +111 -0
- arize/_generated/api_client/models/dataset_example_update.py +100 -0
- arize/_generated/api_client/models/dataset_version.py +13 -13
- arize/_generated/api_client/models/datasets_create_request.py +16 -8
- arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
- arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
- arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
- arize/_generated/api_client/models/datasets_list200_response.py +10 -4
- arize/_generated/api_client/models/experiment.py +14 -16
- arize/_generated/api_client/models/experiment_run.py +108 -0
- arize/_generated/api_client/models/experiment_run_create.py +102 -0
- arize/_generated/api_client/models/experiments_create_request.py +16 -10
- arize/_generated/api_client/models/experiments_list200_response.py +10 -4
- arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
- arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
- arize/_generated/api_client/models/primitive_value.py +172 -0
- arize/_generated/api_client/models/problem.py +100 -0
- arize/_generated/api_client/models/project.py +99 -0
- arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
- arize/_generated/api_client/models/projects_list200_response.py +106 -0
- arize/_generated/api_client/rest.py +2 -2
- arize/_generated/api_client/test/test_dataset.py +4 -2
- arize/_generated/api_client/test/test_dataset_example.py +56 -0
- arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
- arize/_generated/api_client/test/test_dataset_version.py +7 -2
- arize/_generated/api_client/test/test_datasets_api.py +27 -13
- arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
- arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
- arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
- arize/_generated/api_client/test/test_experiment.py +2 -4
- arize/_generated/api_client/test/test_experiment_run.py +56 -0
- arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
- arize/_generated/api_client/test/test_experiments_api.py +6 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
- arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
- arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
- arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
- arize/_generated/api_client/test/test_problem.py +57 -0
- arize/_generated/api_client/test/test_project.py +58 -0
- arize/_generated/api_client/test/test_projects_api.py +59 -0
- arize/_generated/api_client/test/test_projects_create_request.py +54 -0
- arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
- arize/_generated/api_client_README.md +43 -29
- arize/_generated/protocol/flight/flight_pb2.py +400 -0
- arize/_lazy.py +27 -19
- arize/client.py +268 -55
- arize/config.py +365 -116
- arize/constants/__init__.py +1 -0
- arize/constants/config.py +11 -4
- arize/constants/ml.py +6 -4
- arize/constants/openinference.py +2 -0
- arize/constants/pyarrow.py +2 -0
- arize/constants/spans.py +3 -1
- arize/datasets/__init__.py +1 -0
- arize/datasets/client.py +299 -84
- arize/datasets/errors.py +32 -2
- arize/datasets/validation.py +18 -8
- arize/embeddings/__init__.py +2 -0
- arize/embeddings/auto_generator.py +23 -19
- arize/embeddings/base_generators.py +89 -36
- arize/embeddings/constants.py +2 -0
- arize/embeddings/cv_generators.py +26 -4
- arize/embeddings/errors.py +27 -5
- arize/embeddings/nlp_generators.py +31 -12
- arize/embeddings/tabular_generators.py +32 -20
- arize/embeddings/usecases.py +12 -2
- arize/exceptions/__init__.py +1 -0
- arize/exceptions/auth.py +11 -1
- arize/exceptions/base.py +29 -4
- arize/exceptions/models.py +21 -2
- arize/exceptions/parameters.py +31 -0
- arize/exceptions/spaces.py +12 -1
- arize/exceptions/types.py +86 -7
- arize/exceptions/values.py +220 -20
- arize/experiments/__init__.py +1 -0
- arize/experiments/client.py +389 -285
- arize/experiments/evaluators/__init__.py +1 -0
- arize/experiments/evaluators/base.py +74 -41
- arize/experiments/evaluators/exceptions.py +6 -3
- arize/experiments/evaluators/executors.py +121 -73
- arize/experiments/evaluators/rate_limiters.py +106 -57
- arize/experiments/evaluators/types.py +34 -7
- arize/experiments/evaluators/utils.py +65 -27
- arize/experiments/functions.py +103 -101
- arize/experiments/tracing.py +52 -44
- arize/experiments/types.py +56 -31
- arize/logging.py +54 -22
- arize/models/__init__.py +1 -0
- arize/models/batch_validation/__init__.py +1 -0
- arize/models/batch_validation/errors.py +543 -65
- arize/models/batch_validation/validator.py +339 -300
- arize/models/bounded_executor.py +20 -7
- arize/models/casting.py +75 -29
- arize/models/client.py +326 -107
- arize/models/proto.py +95 -40
- arize/models/stream_validation.py +42 -14
- arize/models/surrogate_explainer/__init__.py +1 -0
- arize/models/surrogate_explainer/mimic.py +24 -13
- arize/pre_releases.py +43 -0
- arize/projects/__init__.py +1 -0
- arize/projects/client.py +129 -0
- arize/regions.py +40 -0
- arize/spans/__init__.py +1 -0
- arize/spans/client.py +130 -106
- arize/spans/columns.py +13 -0
- arize/spans/conversion.py +54 -38
- arize/spans/validation/__init__.py +1 -0
- arize/spans/validation/annotations/__init__.py +1 -0
- arize/spans/validation/annotations/annotations_validation.py +6 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
- arize/spans/validation/annotations/value_validation.py +35 -11
- arize/spans/validation/common/__init__.py +1 -0
- arize/spans/validation/common/argument_validation.py +33 -8
- arize/spans/validation/common/dataframe_form_validation.py +35 -9
- arize/spans/validation/common/errors.py +211 -11
- arize/spans/validation/common/value_validation.py +80 -13
- arize/spans/validation/evals/__init__.py +1 -0
- arize/spans/validation/evals/dataframe_form_validation.py +28 -8
- arize/spans/validation/evals/evals_validation.py +34 -4
- arize/spans/validation/evals/value_validation.py +26 -3
- arize/spans/validation/metadata/__init__.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +14 -5
- arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
- arize/spans/validation/metadata/value_validation.py +24 -10
- arize/spans/validation/spans/__init__.py +1 -0
- arize/spans/validation/spans/dataframe_form_validation.py +34 -13
- arize/spans/validation/spans/spans_validation.py +35 -4
- arize/spans/validation/spans/value_validation.py +76 -7
- arize/types.py +293 -157
- arize/utils/__init__.py +1 -0
- arize/utils/arrow.py +31 -15
- arize/utils/cache.py +34 -6
- arize/utils/dataframe.py +19 -2
- arize/utils/online_tasks/__init__.py +2 -0
- arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
- arize/utils/openinference_conversion.py +44 -5
- arize/utils/proto.py +10 -0
- arize/utils/size.py +5 -3
- arize/version.py +3 -1
- {arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
- arize-8.0.0a23.dist-info/RECORD +174 -0
- {arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
- arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
- arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
- arize/_generated/protocol/flight/export_pb2.py +0 -61
- arize/_generated/protocol/flight/ingest_pb2.py +0 -365
- arize-8.0.0a22.dist-info/RECORD +0 -146
- arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
"""Common validation error classes for spans."""
|
|
2
2
|
|
|
3
3
|
from arize.constants.ml import (
|
|
4
4
|
MAX_EMBEDDING_DIMENSIONALITY,
|
|
@@ -23,26 +23,46 @@ from arize.logging import log_a_list
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class InvalidTypeArgument(ValidationError):
|
|
26
|
+
"""Raised when an argument has an invalid type."""
|
|
27
|
+
|
|
26
28
|
def __repr__(self) -> str:
|
|
29
|
+
"""Return a string representation for debugging and logging."""
|
|
27
30
|
return "Invalid_Type_Argument"
|
|
28
31
|
|
|
29
|
-
def __init__(self, arg_name: str, arg_type: str, wrong_arg:
|
|
32
|
+
def __init__(self, arg_name: str, arg_type: str, wrong_arg: object) -> None:
|
|
33
|
+
"""Initialize the exception with argument type validation context.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
arg_name: Name of the argument with invalid type.
|
|
37
|
+
arg_type: Expected type for the argument.
|
|
38
|
+
wrong_arg: Actual argument value that was invalid.
|
|
39
|
+
"""
|
|
30
40
|
self.arg_name = arg_name
|
|
31
41
|
self.arg_type = arg_type
|
|
32
42
|
self.wrong_arg = wrong_arg
|
|
33
43
|
|
|
34
44
|
def error_message(self) -> str:
|
|
45
|
+
"""Return the error message for this exception."""
|
|
35
46
|
return f"The {self.arg_name} must be a {self.arg_type}. Found {type(self.wrong_arg)}"
|
|
36
47
|
|
|
37
48
|
|
|
38
49
|
class InvalidDateTimeFormatType(ValidationError):
|
|
50
|
+
"""Raised when datetime format type is invalid or not supported."""
|
|
51
|
+
|
|
39
52
|
def __repr__(self) -> str:
|
|
53
|
+
"""Return a string representation for debugging and logging."""
|
|
40
54
|
return "Invalid_DateTime_Format_Type"
|
|
41
55
|
|
|
42
|
-
def __init__(self, wrong_input:
|
|
56
|
+
def __init__(self, wrong_input: object) -> None:
|
|
57
|
+
"""Initialize the exception with datetime format validation context.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
wrong_input: Invalid input that was provided for datetime format.
|
|
61
|
+
"""
|
|
43
62
|
self.wrong_input = wrong_input
|
|
44
63
|
|
|
45
64
|
def error_message(self) -> str:
|
|
65
|
+
"""Return the error message for this exception."""
|
|
46
66
|
return f"The date time format must be a string. Found {type(self.wrong_input)}"
|
|
47
67
|
|
|
48
68
|
|
|
@@ -52,13 +72,22 @@ class InvalidDateTimeFormatType(ValidationError):
|
|
|
52
72
|
|
|
53
73
|
|
|
54
74
|
class InvalidDataFrameDuplicateColumns(ValidationError):
|
|
75
|
+
"""Raised when dataframe contains duplicate column names."""
|
|
76
|
+
|
|
55
77
|
def __repr__(self) -> str:
|
|
78
|
+
"""Return a string representation for debugging and logging."""
|
|
56
79
|
return "Invalid_DataFrame_Duplicate_Columns"
|
|
57
80
|
|
|
58
|
-
def __init__(self, duplicate_cols:
|
|
81
|
+
def __init__(self, duplicate_cols: list[str]) -> None:
|
|
82
|
+
"""Initialize the exception with duplicate columns context.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
duplicate_cols: List of column names that have duplicates in the dataframe.
|
|
86
|
+
"""
|
|
59
87
|
self.duplicate_cols = duplicate_cols
|
|
60
88
|
|
|
61
89
|
def error_message(self) -> str:
|
|
90
|
+
"""Return the error message for this exception."""
|
|
62
91
|
return (
|
|
63
92
|
f"The following columns have duplicates in the dataframe: "
|
|
64
93
|
f"{log_a_list(self.duplicate_cols, 'and')}"
|
|
@@ -66,13 +95,22 @@ class InvalidDataFrameDuplicateColumns(ValidationError):
|
|
|
66
95
|
|
|
67
96
|
|
|
68
97
|
class InvalidDataFrameMissingColumns(ValidationError):
|
|
98
|
+
"""Raised when required columns are missing from dataframe."""
|
|
99
|
+
|
|
69
100
|
def __repr__(self) -> str:
|
|
101
|
+
"""Return a string representation for debugging and logging."""
|
|
70
102
|
return "Invalid_DataFrame_Missing_Columns"
|
|
71
103
|
|
|
72
|
-
def __init__(self, missing_cols:
|
|
104
|
+
def __init__(self, missing_cols: list[str]) -> None:
|
|
105
|
+
"""Initialize the exception with missing columns context.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
missing_cols: List of required columns that are missing from the dataframe.
|
|
109
|
+
"""
|
|
73
110
|
self.missing_cols = missing_cols
|
|
74
111
|
|
|
75
112
|
def error_message(self) -> str:
|
|
113
|
+
"""Return the error message for this exception."""
|
|
76
114
|
return (
|
|
77
115
|
f"The following columns are missing in the dataframe and are required: "
|
|
78
116
|
f"{log_a_list(self.missing_cols, 'and')}"
|
|
@@ -80,16 +118,26 @@ class InvalidDataFrameMissingColumns(ValidationError):
|
|
|
80
118
|
|
|
81
119
|
|
|
82
120
|
class InvalidDataFrameColumnContentTypes(ValidationError):
|
|
121
|
+
"""Raised when dataframe column content types are invalid."""
|
|
122
|
+
|
|
83
123
|
def __repr__(self) -> str:
|
|
124
|
+
"""Return a string representation for debugging and logging."""
|
|
84
125
|
return "Invalid_DataFrame_Column_Content_Types"
|
|
85
126
|
|
|
86
127
|
def __init__(
|
|
87
|
-
self, invalid_type_cols:
|
|
128
|
+
self, invalid_type_cols: list[str], expected_type: str
|
|
88
129
|
) -> None:
|
|
130
|
+
"""Initialize the exception with column content type validation context.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
invalid_type_cols: List of columns with incorrect content types.
|
|
134
|
+
expected_type: Expected content type for the columns.
|
|
135
|
+
"""
|
|
89
136
|
self.invalid_type_cols = invalid_type_cols
|
|
90
137
|
self.expected_type = expected_type
|
|
91
138
|
|
|
92
139
|
def error_message(self) -> str:
|
|
140
|
+
"""Return the error message for this exception."""
|
|
93
141
|
return (
|
|
94
142
|
"Found dataframe columns containing the wrong data type. "
|
|
95
143
|
f"The following columns should contain {self.expected_type}: "
|
|
@@ -103,13 +151,22 @@ class InvalidDataFrameColumnContentTypes(ValidationError):
|
|
|
103
151
|
|
|
104
152
|
|
|
105
153
|
class InvalidMissingValueInColumn(ValidationError):
|
|
154
|
+
"""Raised when column contains null or missing values."""
|
|
155
|
+
|
|
106
156
|
def __repr__(self) -> str:
|
|
157
|
+
"""Return a string representation for debugging and logging."""
|
|
107
158
|
return "Invalid_Missin_Value_In_Column"
|
|
108
159
|
|
|
109
160
|
def __init__(self, col_name: str) -> None:
|
|
161
|
+
"""Initialize the exception with missing value context.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
col_name: Name of the column containing missing values.
|
|
165
|
+
"""
|
|
110
166
|
self.col_name = col_name
|
|
111
167
|
|
|
112
168
|
def error_message(self) -> str:
|
|
169
|
+
"""Return the error message for this exception."""
|
|
113
170
|
return (
|
|
114
171
|
f"The column '{self.col_name}' has at least one missing value. "
|
|
115
172
|
"This column must not have missing values"
|
|
@@ -117,15 +174,26 @@ class InvalidMissingValueInColumn(ValidationError):
|
|
|
117
174
|
|
|
118
175
|
|
|
119
176
|
class InvalidStringLengthInColumn(ValidationError):
|
|
177
|
+
"""Raised when string values in column exceed length limits."""
|
|
178
|
+
|
|
120
179
|
def __repr__(self) -> str:
|
|
180
|
+
"""Return a string representation for debugging and logging."""
|
|
121
181
|
return "Invalid_String_Length_In_Column"
|
|
122
182
|
|
|
123
183
|
def __init__(self, col_name: str, min_length: int, max_length: int) -> None:
|
|
184
|
+
"""Initialize the exception with string length validation context.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
col_name: Name of the column with invalid string lengths.
|
|
188
|
+
min_length: Minimum acceptable string length.
|
|
189
|
+
max_length: Maximum acceptable string length.
|
|
190
|
+
"""
|
|
124
191
|
self.col_name = col_name
|
|
125
192
|
self.min_length = min_length
|
|
126
193
|
self.max_length = max_length
|
|
127
194
|
|
|
128
195
|
def error_message(self) -> str:
|
|
196
|
+
"""Return the error message for this exception."""
|
|
129
197
|
return (
|
|
130
198
|
f"The column '{self.col_name}' contains invalid string values, "
|
|
131
199
|
f"their length must be between {self.min_length} and {self.max_length}."
|
|
@@ -133,27 +201,46 @@ class InvalidStringLengthInColumn(ValidationError):
|
|
|
133
201
|
|
|
134
202
|
|
|
135
203
|
class InvalidJsonStringInColumn(ValidationError):
|
|
204
|
+
"""Raised when JSON string in column is invalid or malformed."""
|
|
205
|
+
|
|
136
206
|
def __repr__(self) -> str:
|
|
207
|
+
"""Return a string representation for debugging and logging."""
|
|
137
208
|
return "Invalid_Json_String_In_Column"
|
|
138
209
|
|
|
139
210
|
def __init__(self, col_name: str) -> None:
|
|
211
|
+
"""Initialize the exception with JSON string validation context.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
col_name: Name of the column containing invalid JSON strings.
|
|
215
|
+
"""
|
|
140
216
|
self.col_name = col_name
|
|
141
217
|
|
|
142
218
|
def error_message(self) -> str:
|
|
219
|
+
"""Return the error message for this exception."""
|
|
143
220
|
return (
|
|
144
221
|
f"The column '{self.col_name}' contains invalid JSON string values."
|
|
145
222
|
)
|
|
146
223
|
|
|
147
224
|
|
|
148
225
|
class InvalidStringValueNotAllowedInColumn(ValidationError):
|
|
226
|
+
"""Raised when column contains disallowed string values."""
|
|
227
|
+
|
|
149
228
|
def __repr__(self) -> str:
|
|
229
|
+
"""Return a string representation for debugging and logging."""
|
|
150
230
|
return "Invalid_String_Value_Not_Allowed_In_Column"
|
|
151
231
|
|
|
152
|
-
def __init__(self, col_name: str, allowed_values:
|
|
232
|
+
def __init__(self, col_name: str, allowed_values: list[str]) -> None:
|
|
233
|
+
"""Initialize the exception with allowed string values validation context.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
col_name: Name of the column containing disallowed values.
|
|
237
|
+
allowed_values: List of values that are allowed in the column.
|
|
238
|
+
"""
|
|
153
239
|
self.col_name = col_name
|
|
154
240
|
self.allowed_values = allowed_values
|
|
155
241
|
|
|
156
242
|
def error_message(self) -> str:
|
|
243
|
+
"""Return the error message for this exception."""
|
|
157
244
|
return (
|
|
158
245
|
f"The column '{self.col_name}' contains invalid string values. "
|
|
159
246
|
f"Allowed values are {log_a_list(self.allowed_values, 'and')}"
|
|
@@ -161,13 +248,22 @@ class InvalidStringValueNotAllowedInColumn(ValidationError):
|
|
|
161
248
|
|
|
162
249
|
|
|
163
250
|
class InvalidTimestampValueInColumn(ValidationError):
|
|
251
|
+
"""Raised when timestamp values in column are outside acceptable range."""
|
|
252
|
+
|
|
164
253
|
def __repr__(self) -> str:
|
|
254
|
+
"""Return a string representation for debugging and logging."""
|
|
165
255
|
return "Invalid_Timestamp_Value_In_Column"
|
|
166
256
|
|
|
167
257
|
def __init__(self, timestamp_col_name: str) -> None:
|
|
258
|
+
"""Initialize the exception with timestamp validation context.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
timestamp_col_name: Name of the column containing invalid timestamp values.
|
|
262
|
+
"""
|
|
168
263
|
self.timestamp_col_name = timestamp_col_name
|
|
169
264
|
|
|
170
265
|
def error_message(self) -> str:
|
|
266
|
+
"""Return the error message for this exception."""
|
|
171
267
|
return (
|
|
172
268
|
f"At least one timestamp in the column '{self.timestamp_col_name}' is out of range. "
|
|
173
269
|
f"Timestamps must be within {MAX_FUTURE_YEARS_FROM_CURRENT_TIME} year "
|
|
@@ -177,14 +273,24 @@ class InvalidTimestampValueInColumn(ValidationError):
|
|
|
177
273
|
|
|
178
274
|
|
|
179
275
|
class InvalidStartAndEndTimeValuesInColumn(ValidationError):
|
|
276
|
+
"""Raised when start time is not before end time in span records."""
|
|
277
|
+
|
|
180
278
|
def __repr__(self) -> str:
|
|
279
|
+
"""Return a string representation for debugging and logging."""
|
|
181
280
|
return "Invalid_Start_And_End_Time_Values_In_Column"
|
|
182
281
|
|
|
183
282
|
def __init__(self, greater_col_name: str, less_col_name: str) -> None:
|
|
283
|
+
"""Initialize the exception with span time validation context.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
greater_col_name: Name of the column that should have greater values (end time).
|
|
287
|
+
less_col_name: Name of the column that should have lesser values (start time).
|
|
288
|
+
"""
|
|
184
289
|
self.greater_col_name = greater_col_name
|
|
185
290
|
self.less_col_name = less_col_name
|
|
186
291
|
|
|
187
292
|
def error_message(self) -> str:
|
|
293
|
+
"""Return the error message for this exception."""
|
|
188
294
|
return (
|
|
189
295
|
f"Invalid span times. Values in column '{self.greater_col_name}' "
|
|
190
296
|
f"should be greater than values in column '{self.less_col_name}'"
|
|
@@ -192,7 +298,10 @@ class InvalidStartAndEndTimeValuesInColumn(ValidationError):
|
|
|
192
298
|
|
|
193
299
|
|
|
194
300
|
class InvalidEventValueInColumn(ValidationError):
|
|
301
|
+
"""Raised when event values in column are invalid or malformed."""
|
|
302
|
+
|
|
195
303
|
def __repr__(self) -> str:
|
|
304
|
+
"""Return a string representation for debugging and logging."""
|
|
196
305
|
return "Invalid_Event_Value_In_Column"
|
|
197
306
|
|
|
198
307
|
def __init__(
|
|
@@ -202,12 +311,21 @@ class InvalidEventValueInColumn(ValidationError):
|
|
|
202
311
|
wrong_time: bool,
|
|
203
312
|
wrong_attrs: bool,
|
|
204
313
|
) -> None:
|
|
314
|
+
"""Initialize the exception with event value validation context.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
col_name: Name of the column containing invalid events.
|
|
318
|
+
wrong_name: Whether event names are invalid.
|
|
319
|
+
wrong_time: Whether event timestamps are invalid.
|
|
320
|
+
wrong_attrs: Whether event attributes are invalid.
|
|
321
|
+
"""
|
|
205
322
|
self.col_name = col_name
|
|
206
323
|
self.wrong_name = wrong_name
|
|
207
324
|
self.wrong_time = wrong_time
|
|
208
325
|
self.wrong_attrs = wrong_attrs
|
|
209
326
|
|
|
210
327
|
def error_message(self) -> str:
|
|
328
|
+
"""Return the error message for this exception."""
|
|
211
329
|
msg = f"Found at least one invalid event in column '{self.col_name}'. "
|
|
212
330
|
if self.wrong_name:
|
|
213
331
|
msg += (
|
|
@@ -228,7 +346,10 @@ class InvalidEventValueInColumn(ValidationError):
|
|
|
228
346
|
|
|
229
347
|
|
|
230
348
|
class InvalidLLMMessageValueInColumn(ValidationError):
|
|
349
|
+
"""Raised when LLM message values in column are invalid or malformed."""
|
|
350
|
+
|
|
231
351
|
def __repr__(self) -> str:
|
|
352
|
+
"""Return a string representation for debugging and logging."""
|
|
232
353
|
return "Invalid_LLM_Message_Value_In_Column"
|
|
233
354
|
|
|
234
355
|
def __init__(
|
|
@@ -238,12 +359,21 @@ class InvalidLLMMessageValueInColumn(ValidationError):
|
|
|
238
359
|
wrong_content: bool,
|
|
239
360
|
wrong_tool_calls: bool,
|
|
240
361
|
) -> None:
|
|
362
|
+
"""Initialize the exception with LLM message validation context.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
col_name: Name of the column containing invalid LLM messages.
|
|
366
|
+
wrong_role: Whether message roles are invalid.
|
|
367
|
+
wrong_content: Whether message contents are invalid.
|
|
368
|
+
wrong_tool_calls: Whether tool calls are invalid.
|
|
369
|
+
"""
|
|
241
370
|
self.col_name = col_name
|
|
242
371
|
self.wrong_role = wrong_role
|
|
243
372
|
self.wrong_content = wrong_content
|
|
244
373
|
self.wrong_tool_calls = wrong_tool_calls
|
|
245
374
|
|
|
246
375
|
def error_message(self) -> str:
|
|
376
|
+
"""Return the error message for this exception."""
|
|
247
377
|
msg = f"Found at least one invalid LLM message in column '{self.col_name}'. "
|
|
248
378
|
if self.wrong_role:
|
|
249
379
|
msg += (
|
|
@@ -265,17 +395,28 @@ class InvalidLLMMessageValueInColumn(ValidationError):
|
|
|
265
395
|
|
|
266
396
|
|
|
267
397
|
class InvalidEmbeddingValueInColumn(ValidationError):
|
|
398
|
+
"""Raised when embedding values in column are invalid or malformed."""
|
|
399
|
+
|
|
268
400
|
def __repr__(self) -> str:
|
|
401
|
+
"""Return a string representation for debugging and logging."""
|
|
269
402
|
return "Invalid_Embedding_Value_In_Column"
|
|
270
403
|
|
|
271
404
|
def __init__(
|
|
272
405
|
self, col_name: str, wrong_vector: bool, wrong_text: bool
|
|
273
406
|
) -> None:
|
|
407
|
+
"""Initialize the exception with embedding value validation context.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
col_name: Name of the column containing invalid embeddings.
|
|
411
|
+
wrong_vector: Whether embedding vectors are invalid.
|
|
412
|
+
wrong_text: Whether embedding texts are invalid.
|
|
413
|
+
"""
|
|
274
414
|
self.col_name = col_name
|
|
275
415
|
self.wrong_vector = wrong_vector
|
|
276
416
|
self.wrong_text = wrong_text
|
|
277
417
|
|
|
278
418
|
def error_message(self) -> str:
|
|
419
|
+
"""Return the error message for this exception."""
|
|
279
420
|
msg = f"Found at least one invalid embedding object in column '{self.col_name}'. "
|
|
280
421
|
if self.wrong_vector:
|
|
281
422
|
msg += (
|
|
@@ -292,7 +433,10 @@ class InvalidEmbeddingValueInColumn(ValidationError):
|
|
|
292
433
|
|
|
293
434
|
|
|
294
435
|
class InvalidDocumentValueInColumn(ValidationError):
|
|
436
|
+
"""Raised when document values in column are invalid or malformed."""
|
|
437
|
+
|
|
295
438
|
def __repr__(self) -> str:
|
|
439
|
+
"""Return a string representation for debugging and logging."""
|
|
296
440
|
return "Invalid_Document_Value_In_Column"
|
|
297
441
|
|
|
298
442
|
def __init__(
|
|
@@ -302,12 +446,21 @@ class InvalidDocumentValueInColumn(ValidationError):
|
|
|
302
446
|
wrong_content: bool,
|
|
303
447
|
wrong_metadata: bool,
|
|
304
448
|
) -> None:
|
|
449
|
+
"""Initialize the exception with document value validation context.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
col_name: Name of the column containing invalid documents.
|
|
453
|
+
wrong_id: Whether document IDs are invalid.
|
|
454
|
+
wrong_content: Whether document contents are invalid.
|
|
455
|
+
wrong_metadata: Whether document metadata is invalid.
|
|
456
|
+
"""
|
|
305
457
|
self.col_name = col_name
|
|
306
458
|
self.wrong_id = wrong_id
|
|
307
459
|
self.wrong_content = wrong_content
|
|
308
460
|
self.wrong_metadata = wrong_metadata
|
|
309
461
|
|
|
310
462
|
def error_message(self) -> str:
|
|
463
|
+
"""Return the error message for this exception."""
|
|
311
464
|
msg = (
|
|
312
465
|
f"Found at least one invalid document in column '{self.col_name}'. "
|
|
313
466
|
)
|
|
@@ -331,13 +484,22 @@ class InvalidDocumentValueInColumn(ValidationError):
|
|
|
331
484
|
|
|
332
485
|
|
|
333
486
|
class InvalidFloatValueInColumn(ValidationError):
|
|
487
|
+
"""Raised when float values in column are invalid or out of range."""
|
|
488
|
+
|
|
334
489
|
def __repr__(self) -> str:
|
|
490
|
+
"""Return a string representation for debugging and logging."""
|
|
335
491
|
return "Invalid_Float_Value_In_Column"
|
|
336
492
|
|
|
337
493
|
def __init__(self, col_name: str) -> None:
|
|
494
|
+
"""Initialize the exception with float value validation context.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
col_name: Name of the column containing invalid float values.
|
|
498
|
+
"""
|
|
338
499
|
self.col_name = col_name
|
|
339
500
|
|
|
340
501
|
def error_message(self) -> str:
|
|
502
|
+
"""Return the error message for this exception."""
|
|
341
503
|
return (
|
|
342
504
|
f"The column '{self.col_name}' contains invalid float values. "
|
|
343
505
|
f"Invalid values are +/- infinite values."
|
|
@@ -345,13 +507,22 @@ class InvalidFloatValueInColumn(ValidationError):
|
|
|
345
507
|
|
|
346
508
|
|
|
347
509
|
class InvalidNullEvalLabelAndScore(ValidationError):
|
|
510
|
+
"""Raised when both eval label and score are null in a record."""
|
|
511
|
+
|
|
348
512
|
def __repr__(self) -> str:
|
|
513
|
+
"""Return a string representation for debugging and logging."""
|
|
349
514
|
return "Invalid_Null_Eval_Label_And_Score"
|
|
350
515
|
|
|
351
|
-
def __init__(self, eval_names:
|
|
516
|
+
def __init__(self, eval_names: list[str]) -> None:
|
|
517
|
+
"""Initialize the exception with eval label and score validation context.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
eval_names: List of eval names missing both label and score.
|
|
521
|
+
"""
|
|
352
522
|
self.eval_names = eval_names
|
|
353
523
|
|
|
354
524
|
def error_message(self) -> str:
|
|
525
|
+
"""Return the error message for this exception."""
|
|
355
526
|
return (
|
|
356
527
|
f"There is at least one row without a label and score for the following evals: "
|
|
357
528
|
f"{log_a_list(self.eval_names, 'and')}"
|
|
@@ -359,14 +530,24 @@ class InvalidNullEvalLabelAndScore(ValidationError):
|
|
|
359
530
|
|
|
360
531
|
|
|
361
532
|
class DuplicateAnnotationNameInSpan(ValidationError):
|
|
533
|
+
"""Raised when a span contains duplicate annotation names."""
|
|
534
|
+
|
|
362
535
|
def __repr__(self) -> str:
|
|
536
|
+
"""Return a string representation for debugging and logging."""
|
|
363
537
|
return "Duplicate_Annotation_Name_In_Span"
|
|
364
538
|
|
|
365
|
-
def __init__(self, span_id: str, duplicate_names:
|
|
539
|
+
def __init__(self, span_id: str, duplicate_names: list[str]) -> None:
|
|
540
|
+
"""Initialize the exception with duplicate annotation names context.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
span_id: ID of the span containing duplicate annotations.
|
|
544
|
+
duplicate_names: List of annotation names that are duplicated.
|
|
545
|
+
"""
|
|
366
546
|
self.span_id = span_id
|
|
367
547
|
self.duplicate_names = duplicate_names
|
|
368
548
|
|
|
369
549
|
def error_message(self) -> str:
|
|
550
|
+
"""Return the error message for this exception."""
|
|
370
551
|
return (
|
|
371
552
|
f"Found duplicate annotation/eval names within the same span_id '{self.span_id}'. "
|
|
372
553
|
f"Duplicate names: {log_a_list(self.duplicate_names, 'and')}. "
|
|
@@ -376,13 +557,22 @@ class DuplicateAnnotationNameInSpan(ValidationError):
|
|
|
376
557
|
|
|
377
558
|
|
|
378
559
|
class InvalidNullAnnotationLabelAndScore(ValidationError):
|
|
560
|
+
"""Raised when both annotation label and score are null in a record."""
|
|
561
|
+
|
|
379
562
|
def __repr__(self) -> str:
|
|
563
|
+
"""Return a string representation for debugging and logging."""
|
|
380
564
|
return "Invalid_Null_Annotation_Label_And_Score"
|
|
381
565
|
|
|
382
|
-
def __init__(self, annotation_names:
|
|
566
|
+
def __init__(self, annotation_names: list[str]) -> None:
|
|
567
|
+
"""Initialize the exception with annotation validation context.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
annotation_names: List of annotation names missing both label and score.
|
|
571
|
+
"""
|
|
383
572
|
self.annotation_names = annotation_names
|
|
384
573
|
|
|
385
574
|
def error_message(self) -> str:
|
|
575
|
+
"""Return the error message for this exception."""
|
|
386
576
|
return (
|
|
387
577
|
"There is at least one row where both label and score are missing for the "
|
|
388
578
|
f"following annotations: {log_a_list(self.annotation_names, 'and')}. "
|
|
@@ -391,16 +581,26 @@ class InvalidNullAnnotationLabelAndScore(ValidationError):
|
|
|
391
581
|
|
|
392
582
|
|
|
393
583
|
class InvalidAnnotationColumnFormat(ValidationError):
|
|
584
|
+
"""Raised when annotation column format is invalid or malformed."""
|
|
585
|
+
|
|
394
586
|
def __repr__(self) -> str:
|
|
587
|
+
"""Return a string representation for debugging and logging."""
|
|
395
588
|
return "Invalid_Annotation_Column_Format"
|
|
396
589
|
|
|
397
590
|
def __init__(
|
|
398
|
-
self, invalid_format_cols:
|
|
591
|
+
self, invalid_format_cols: list[str], expected_format: str
|
|
399
592
|
) -> None:
|
|
593
|
+
"""Initialize the exception with annotation column format validation context.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
invalid_format_cols: List of columns with invalid annotation format.
|
|
597
|
+
expected_format: Expected format for annotation columns.
|
|
598
|
+
"""
|
|
400
599
|
self.invalid_format_cols = invalid_format_cols
|
|
401
600
|
self.expected_format = expected_format
|
|
402
601
|
|
|
403
602
|
def error_message(self) -> str:
|
|
603
|
+
"""Return the error message for this exception."""
|
|
404
604
|
return (
|
|
405
605
|
f"The following columns have an invalid annotation column format: "
|
|
406
606
|
f"{log_a_list(self.invalid_format_cols, 'and')}. "
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
"""Common value validation logic for span data."""
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from datetime import datetime, timedelta
|
|
3
|
-
from typing import List
|
|
4
|
+
from datetime import datetime, timedelta, timezone
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
7
|
import pandas as pd
|
|
@@ -31,7 +32,15 @@ logger = logging.getLogger(__name__)
|
|
|
31
32
|
|
|
32
33
|
def check_invalid_project_name(
|
|
33
34
|
project_name: str | None,
|
|
34
|
-
) ->
|
|
35
|
+
) -> list[InvalidProjectName]:
|
|
36
|
+
"""Validates that the project name is a non-empty string.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
project_name: The project name to validate.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List of validation errors if project name is invalid (empty if valid).
|
|
43
|
+
"""
|
|
35
44
|
# assume it's been coerced to string beforehand
|
|
36
45
|
if (not isinstance(project_name, str)) or len(project_name.strip()) == 0:
|
|
37
46
|
return [InvalidProjectName()]
|
|
@@ -40,7 +49,15 @@ def check_invalid_project_name(
|
|
|
40
49
|
|
|
41
50
|
def check_invalid_model_version(
|
|
42
51
|
model_version: str | None = None,
|
|
43
|
-
) ->
|
|
52
|
+
) -> list[InvalidModelVersion]:
|
|
53
|
+
"""Validates that the model version, if provided, is a non-empty string.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
model_version: The optional model version to validate.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
List of validation errors if model version is invalid (empty if valid or None).
|
|
60
|
+
"""
|
|
44
61
|
if model_version is None:
|
|
45
62
|
return []
|
|
46
63
|
if not isinstance(model_version, str) or len(model_version.strip()) == 0:
|
|
@@ -56,7 +73,20 @@ def check_string_column_value_length(
|
|
|
56
73
|
max_len: int,
|
|
57
74
|
is_required: bool,
|
|
58
75
|
must_be_json: bool = False,
|
|
59
|
-
) ->
|
|
76
|
+
) -> list[InvalidMissingValueInColumn | InvalidStringLengthInColumn]:
|
|
77
|
+
"""Validate string column values are within length bounds and optionally valid JSON.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
df: The DataFrame to validate.
|
|
81
|
+
col_name: Name of the column to check.
|
|
82
|
+
min_len: Minimum allowed string length.
|
|
83
|
+
max_len: Maximum allowed string length.
|
|
84
|
+
is_required: Whether the column must have non-null values.
|
|
85
|
+
must_be_json: Whether values must be valid JSON strings. Defaults to False.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List of validation errors for missing values, invalid lengths, or invalid JSON.
|
|
89
|
+
"""
|
|
60
90
|
if col_name not in df.columns:
|
|
61
91
|
return []
|
|
62
92
|
|
|
@@ -97,9 +127,20 @@ def check_string_column_value_length(
|
|
|
97
127
|
def check_string_column_allowed_values(
|
|
98
128
|
df: pd.DataFrame,
|
|
99
129
|
col_name: str,
|
|
100
|
-
allowed_values:
|
|
130
|
+
allowed_values: list[str],
|
|
101
131
|
is_required: bool,
|
|
102
|
-
) ->
|
|
132
|
+
) -> list[InvalidMissingValueInColumn | InvalidStringValueNotAllowedInColumn]:
|
|
133
|
+
"""Validate that string column values are within allowed values.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
df: The DataFrame to validate.
|
|
137
|
+
col_name: The column name to check.
|
|
138
|
+
allowed_values: List of allowed string values (case-insensitive).
|
|
139
|
+
is_required: Whether the column must not have missing values.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
List of validation errors found.
|
|
143
|
+
"""
|
|
103
144
|
if col_name not in df.columns:
|
|
104
145
|
return []
|
|
105
146
|
|
|
@@ -136,7 +177,16 @@ def check_string_column_allowed_values(
|
|
|
136
177
|
def check_float_column_valid_numbers(
|
|
137
178
|
df: pd.DataFrame,
|
|
138
179
|
col_name: str,
|
|
139
|
-
) ->
|
|
180
|
+
) -> list[InvalidFloatValueInColumn]:
|
|
181
|
+
"""Check that float column contains only finite numbers, no infinity values.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
df: The DataFrame to validate.
|
|
185
|
+
col_name: The column name to check.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
List containing InvalidFloatValueInColumn error if infinite values found.
|
|
189
|
+
"""
|
|
140
190
|
if col_name not in df.columns:
|
|
141
191
|
return []
|
|
142
192
|
# np.isinf will fail on None values, change Nones to np.nan and check on that
|
|
@@ -145,18 +195,25 @@ def check_float_column_valid_numbers(
|
|
|
145
195
|
invalid_exists = invalid_mask.any()
|
|
146
196
|
|
|
147
197
|
if invalid_exists:
|
|
148
|
-
|
|
149
|
-
return error
|
|
198
|
+
return [InvalidFloatValueInColumn(col_name=col_name)]
|
|
150
199
|
return []
|
|
151
200
|
|
|
152
201
|
|
|
153
202
|
def check_value_columns_start_end_time(
|
|
154
203
|
df: pd.DataFrame,
|
|
155
|
-
) ->
|
|
204
|
+
) -> list[
|
|
156
205
|
InvalidMissingValueInColumn
|
|
157
206
|
| InvalidTimestampValueInColumn
|
|
158
207
|
| InvalidStartAndEndTimeValuesInColumn
|
|
159
208
|
]:
|
|
209
|
+
"""Validate start and end time columns for timestamps and logical ordering.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
df: The DataFrame containing start and end time columns.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
List of validation errors for missing values, invalid timestamps, or start > end.
|
|
216
|
+
"""
|
|
160
217
|
errors = []
|
|
161
218
|
errors += check_value_timestamp(
|
|
162
219
|
df=df,
|
|
@@ -186,7 +243,17 @@ def check_value_timestamp(
|
|
|
186
243
|
df: pd.DataFrame,
|
|
187
244
|
col_name: str,
|
|
188
245
|
is_required: bool,
|
|
189
|
-
) ->
|
|
246
|
+
) -> list[InvalidMissingValueInColumn | InvalidTimestampValueInColumn]:
|
|
247
|
+
"""Validate timestamp column values are within reasonable bounds.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
df: The DataFrame to validate.
|
|
251
|
+
col_name: The column name containing timestamps in nanoseconds.
|
|
252
|
+
is_required: Whether missing values should be flagged as errors.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
List of validation errors for missing or out-of-bounds timestamps.
|
|
256
|
+
"""
|
|
190
257
|
# This check expects that timestamps have previously been converted to nanoseconds
|
|
191
258
|
if col_name not in df.columns:
|
|
192
259
|
return []
|
|
@@ -199,7 +266,7 @@ def check_value_timestamp(
|
|
|
199
266
|
)
|
|
200
267
|
)
|
|
201
268
|
|
|
202
|
-
now_t = datetime.now()
|
|
269
|
+
now_t = datetime.now(tz=timezone.utc)
|
|
203
270
|
lbound, ubound = (
|
|
204
271
|
(
|
|
205
272
|
now_t - timedelta(days=MAX_PAST_YEARS_FROM_CURRENT_TIME * 365)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Evaluation data validation for LLM tracing spans."""
|