arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. arize/__init__.py +17 -9
  2. arize/_exporter/client.py +55 -36
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +208 -77
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +269 -55
  65. arize/config.py +365 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +299 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +31 -12
  83. arize/embeddings/tabular_generators.py +32 -20
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +1 -0
  94. arize/experiments/client.py +390 -286
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/models/__init__.py +1 -0
  107. arize/models/batch_validation/__init__.py +1 -0
  108. arize/models/batch_validation/errors.py +543 -65
  109. arize/models/batch_validation/validator.py +339 -300
  110. arize/models/bounded_executor.py +20 -7
  111. arize/models/casting.py +75 -29
  112. arize/models/client.py +326 -107
  113. arize/models/proto.py +95 -40
  114. arize/models/stream_validation.py +42 -14
  115. arize/models/surrogate_explainer/__init__.py +1 -0
  116. arize/models/surrogate_explainer/mimic.py +24 -13
  117. arize/pre_releases.py +43 -0
  118. arize/projects/__init__.py +1 -0
  119. arize/projects/client.py +129 -0
  120. arize/regions.py +40 -0
  121. arize/spans/__init__.py +1 -0
  122. arize/spans/client.py +130 -106
  123. arize/spans/columns.py +13 -0
  124. arize/spans/conversion.py +54 -38
  125. arize/spans/validation/__init__.py +1 -0
  126. arize/spans/validation/annotations/__init__.py +1 -0
  127. arize/spans/validation/annotations/annotations_validation.py +6 -4
  128. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  129. arize/spans/validation/annotations/value_validation.py +35 -11
  130. arize/spans/validation/common/__init__.py +1 -0
  131. arize/spans/validation/common/argument_validation.py +33 -8
  132. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  133. arize/spans/validation/common/errors.py +211 -11
  134. arize/spans/validation/common/value_validation.py +80 -13
  135. arize/spans/validation/evals/__init__.py +1 -0
  136. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  137. arize/spans/validation/evals/evals_validation.py +34 -4
  138. arize/spans/validation/evals/value_validation.py +26 -3
  139. arize/spans/validation/metadata/__init__.py +1 -1
  140. arize/spans/validation/metadata/argument_validation.py +14 -5
  141. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  142. arize/spans/validation/metadata/value_validation.py +24 -10
  143. arize/spans/validation/spans/__init__.py +1 -0
  144. arize/spans/validation/spans/dataframe_form_validation.py +34 -13
  145. arize/spans/validation/spans/spans_validation.py +35 -4
  146. arize/spans/validation/spans/value_validation.py +76 -7
  147. arize/types.py +293 -157
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +19 -2
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/version.py +3 -1
  158. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
  159. arize-8.0.0a23.dist-info/RECORD +174 -0
  160. {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
  161. arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
  162. arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
  163. arize/_generated/protocol/flight/export_pb2.py +0 -61
  164. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  165. arize-8.0.0a21.dist-info/RECORD +0 -146
  166. arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12
@@ -1,4 +1,4 @@
1
- from typing import Any, List
1
+ """Common validation error classes for spans."""
2
2
 
3
3
  from arize.constants.ml import (
4
4
  MAX_EMBEDDING_DIMENSIONALITY,
@@ -23,26 +23,46 @@ from arize.logging import log_a_list
23
23
 
24
24
 
25
25
  class InvalidTypeArgument(ValidationError):
26
+ """Raised when an argument has an invalid type."""
27
+
26
28
  def __repr__(self) -> str:
29
+ """Return a string representation for debugging and logging."""
27
30
  return "Invalid_Type_Argument"
28
31
 
29
- def __init__(self, arg_name: str, arg_type: str, wrong_arg: Any) -> None:
32
+ def __init__(self, arg_name: str, arg_type: str, wrong_arg: object) -> None:
33
+ """Initialize the exception with argument type validation context.
34
+
35
+ Args:
36
+ arg_name: Name of the argument with invalid type.
37
+ arg_type: Expected type for the argument.
38
+ wrong_arg: Actual argument value that was invalid.
39
+ """
30
40
  self.arg_name = arg_name
31
41
  self.arg_type = arg_type
32
42
  self.wrong_arg = wrong_arg
33
43
 
34
44
  def error_message(self) -> str:
45
+ """Return the error message for this exception."""
35
46
  return f"The {self.arg_name} must be a {self.arg_type}. Found {type(self.wrong_arg)}"
36
47
 
37
48
 
38
49
  class InvalidDateTimeFormatType(ValidationError):
50
+ """Raised when datetime format type is invalid or not supported."""
51
+
39
52
  def __repr__(self) -> str:
53
+ """Return a string representation for debugging and logging."""
40
54
  return "Invalid_DateTime_Format_Type"
41
55
 
42
- def __init__(self, wrong_input: Any) -> None:
56
+ def __init__(self, wrong_input: object) -> None:
57
+ """Initialize the exception with datetime format validation context.
58
+
59
+ Args:
60
+ wrong_input: Invalid input that was provided for datetime format.
61
+ """
43
62
  self.wrong_input = wrong_input
44
63
 
45
64
  def error_message(self) -> str:
65
+ """Return the error message for this exception."""
46
66
  return f"The date time format must be a string. Found {type(self.wrong_input)}"
47
67
 
48
68
 
@@ -52,13 +72,22 @@ class InvalidDateTimeFormatType(ValidationError):
52
72
 
53
73
 
54
74
  class InvalidDataFrameDuplicateColumns(ValidationError):
75
+ """Raised when dataframe contains duplicate column names."""
76
+
55
77
  def __repr__(self) -> str:
78
+ """Return a string representation for debugging and logging."""
56
79
  return "Invalid_DataFrame_Duplicate_Columns"
57
80
 
58
- def __init__(self, duplicate_cols: List[str]) -> None:
81
+ def __init__(self, duplicate_cols: list[str]) -> None:
82
+ """Initialize the exception with duplicate columns context.
83
+
84
+ Args:
85
+ duplicate_cols: List of column names that have duplicates in the dataframe.
86
+ """
59
87
  self.duplicate_cols = duplicate_cols
60
88
 
61
89
  def error_message(self) -> str:
90
+ """Return the error message for this exception."""
62
91
  return (
63
92
  f"The following columns have duplicates in the dataframe: "
64
93
  f"{log_a_list(self.duplicate_cols, 'and')}"
@@ -66,13 +95,22 @@ class InvalidDataFrameDuplicateColumns(ValidationError):
66
95
 
67
96
 
68
97
  class InvalidDataFrameMissingColumns(ValidationError):
98
+ """Raised when required columns are missing from dataframe."""
99
+
69
100
  def __repr__(self) -> str:
101
+ """Return a string representation for debugging and logging."""
70
102
  return "Invalid_DataFrame_Missing_Columns"
71
103
 
72
- def __init__(self, missing_cols: List[str]) -> None:
104
+ def __init__(self, missing_cols: list[str]) -> None:
105
+ """Initialize the exception with missing columns context.
106
+
107
+ Args:
108
+ missing_cols: List of required columns that are missing from the dataframe.
109
+ """
73
110
  self.missing_cols = missing_cols
74
111
 
75
112
  def error_message(self) -> str:
113
+ """Return the error message for this exception."""
76
114
  return (
77
115
  f"The following columns are missing in the dataframe and are required: "
78
116
  f"{log_a_list(self.missing_cols, 'and')}"
@@ -80,16 +118,26 @@ class InvalidDataFrameMissingColumns(ValidationError):
80
118
 
81
119
 
82
120
  class InvalidDataFrameColumnContentTypes(ValidationError):
121
+ """Raised when dataframe column content types are invalid."""
122
+
83
123
  def __repr__(self) -> str:
124
+ """Return a string representation for debugging and logging."""
84
125
  return "Invalid_DataFrame_Column_Content_Types"
85
126
 
86
127
  def __init__(
87
- self, invalid_type_cols: List[str], expected_type: str
128
+ self, invalid_type_cols: list[str], expected_type: str
88
129
  ) -> None:
130
+ """Initialize the exception with column content type validation context.
131
+
132
+ Args:
133
+ invalid_type_cols: List of columns with incorrect content types.
134
+ expected_type: Expected content type for the columns.
135
+ """
89
136
  self.invalid_type_cols = invalid_type_cols
90
137
  self.expected_type = expected_type
91
138
 
92
139
  def error_message(self) -> str:
140
+ """Return the error message for this exception."""
93
141
  return (
94
142
  "Found dataframe columns containing the wrong data type. "
95
143
  f"The following columns should contain {self.expected_type}: "
@@ -103,13 +151,22 @@ class InvalidDataFrameColumnContentTypes(ValidationError):
103
151
 
104
152
 
105
153
  class InvalidMissingValueInColumn(ValidationError):
154
+ """Raised when column contains null or missing values."""
155
+
106
156
  def __repr__(self) -> str:
157
+ """Return a string representation for debugging and logging."""
107
158
  return "Invalid_Missin_Value_In_Column"
108
159
 
109
160
  def __init__(self, col_name: str) -> None:
161
+ """Initialize the exception with missing value context.
162
+
163
+ Args:
164
+ col_name: Name of the column containing missing values.
165
+ """
110
166
  self.col_name = col_name
111
167
 
112
168
  def error_message(self) -> str:
169
+ """Return the error message for this exception."""
113
170
  return (
114
171
  f"The column '{self.col_name}' has at least one missing value. "
115
172
  "This column must not have missing values"
@@ -117,15 +174,26 @@ class InvalidMissingValueInColumn(ValidationError):
117
174
 
118
175
 
119
176
  class InvalidStringLengthInColumn(ValidationError):
177
+ """Raised when string values in column exceed length limits."""
178
+
120
179
  def __repr__(self) -> str:
180
+ """Return a string representation for debugging and logging."""
121
181
  return "Invalid_String_Length_In_Column"
122
182
 
123
183
  def __init__(self, col_name: str, min_length: int, max_length: int) -> None:
184
+ """Initialize the exception with string length validation context.
185
+
186
+ Args:
187
+ col_name: Name of the column with invalid string lengths.
188
+ min_length: Minimum acceptable string length.
189
+ max_length: Maximum acceptable string length.
190
+ """
124
191
  self.col_name = col_name
125
192
  self.min_length = min_length
126
193
  self.max_length = max_length
127
194
 
128
195
  def error_message(self) -> str:
196
+ """Return the error message for this exception."""
129
197
  return (
130
198
  f"The column '{self.col_name}' contains invalid string values, "
131
199
  f"their length must be between {self.min_length} and {self.max_length}."
@@ -133,27 +201,46 @@ class InvalidStringLengthInColumn(ValidationError):
133
201
 
134
202
 
135
203
  class InvalidJsonStringInColumn(ValidationError):
204
+ """Raised when JSON string in column is invalid or malformed."""
205
+
136
206
  def __repr__(self) -> str:
207
+ """Return a string representation for debugging and logging."""
137
208
  return "Invalid_Json_String_In_Column"
138
209
 
139
210
  def __init__(self, col_name: str) -> None:
211
+ """Initialize the exception with JSON string validation context.
212
+
213
+ Args:
214
+ col_name: Name of the column containing invalid JSON strings.
215
+ """
140
216
  self.col_name = col_name
141
217
 
142
218
  def error_message(self) -> str:
219
+ """Return the error message for this exception."""
143
220
  return (
144
221
  f"The column '{self.col_name}' contains invalid JSON string values."
145
222
  )
146
223
 
147
224
 
148
225
  class InvalidStringValueNotAllowedInColumn(ValidationError):
226
+ """Raised when column contains disallowed string values."""
227
+
149
228
  def __repr__(self) -> str:
229
+ """Return a string representation for debugging and logging."""
150
230
  return "Invalid_String_Value_Not_Allowed_In_Column"
151
231
 
152
- def __init__(self, col_name: str, allowed_values: List[str]) -> None:
232
+ def __init__(self, col_name: str, allowed_values: list[str]) -> None:
233
+ """Initialize the exception with allowed string values validation context.
234
+
235
+ Args:
236
+ col_name: Name of the column containing disallowed values.
237
+ allowed_values: List of values that are allowed in the column.
238
+ """
153
239
  self.col_name = col_name
154
240
  self.allowed_values = allowed_values
155
241
 
156
242
  def error_message(self) -> str:
243
+ """Return the error message for this exception."""
157
244
  return (
158
245
  f"The column '{self.col_name}' contains invalid string values. "
159
246
  f"Allowed values are {log_a_list(self.allowed_values, 'and')}"
@@ -161,13 +248,22 @@ class InvalidStringValueNotAllowedInColumn(ValidationError):
161
248
 
162
249
 
163
250
  class InvalidTimestampValueInColumn(ValidationError):
251
+ """Raised when timestamp values in column are outside acceptable range."""
252
+
164
253
  def __repr__(self) -> str:
254
+ """Return a string representation for debugging and logging."""
165
255
  return "Invalid_Timestamp_Value_In_Column"
166
256
 
167
257
  def __init__(self, timestamp_col_name: str) -> None:
258
+ """Initialize the exception with timestamp validation context.
259
+
260
+ Args:
261
+ timestamp_col_name: Name of the column containing invalid timestamp values.
262
+ """
168
263
  self.timestamp_col_name = timestamp_col_name
169
264
 
170
265
  def error_message(self) -> str:
266
+ """Return the error message for this exception."""
171
267
  return (
172
268
  f"At least one timestamp in the column '{self.timestamp_col_name}' is out of range. "
173
269
  f"Timestamps must be within {MAX_FUTURE_YEARS_FROM_CURRENT_TIME} year "
@@ -177,14 +273,24 @@ class InvalidTimestampValueInColumn(ValidationError):
177
273
 
178
274
 
179
275
  class InvalidStartAndEndTimeValuesInColumn(ValidationError):
276
+ """Raised when start time is not before end time in span records."""
277
+
180
278
  def __repr__(self) -> str:
279
+ """Return a string representation for debugging and logging."""
181
280
  return "Invalid_Start_And_End_Time_Values_In_Column"
182
281
 
183
282
  def __init__(self, greater_col_name: str, less_col_name: str) -> None:
283
+ """Initialize the exception with span time validation context.
284
+
285
+ Args:
286
+ greater_col_name: Name of the column that should have greater values (end time).
287
+ less_col_name: Name of the column that should have lesser values (start time).
288
+ """
184
289
  self.greater_col_name = greater_col_name
185
290
  self.less_col_name = less_col_name
186
291
 
187
292
  def error_message(self) -> str:
293
+ """Return the error message for this exception."""
188
294
  return (
189
295
  f"Invalid span times. Values in column '{self.greater_col_name}' "
190
296
  f"should be greater than values in column '{self.less_col_name}'"
@@ -192,7 +298,10 @@ class InvalidStartAndEndTimeValuesInColumn(ValidationError):
192
298
 
193
299
 
194
300
  class InvalidEventValueInColumn(ValidationError):
301
+ """Raised when event values in column are invalid or malformed."""
302
+
195
303
  def __repr__(self) -> str:
304
+ """Return a string representation for debugging and logging."""
196
305
  return "Invalid_Event_Value_In_Column"
197
306
 
198
307
  def __init__(
@@ -202,12 +311,21 @@ class InvalidEventValueInColumn(ValidationError):
202
311
  wrong_time: bool,
203
312
  wrong_attrs: bool,
204
313
  ) -> None:
314
+ """Initialize the exception with event value validation context.
315
+
316
+ Args:
317
+ col_name: Name of the column containing invalid events.
318
+ wrong_name: Whether event names are invalid.
319
+ wrong_time: Whether event timestamps are invalid.
320
+ wrong_attrs: Whether event attributes are invalid.
321
+ """
205
322
  self.col_name = col_name
206
323
  self.wrong_name = wrong_name
207
324
  self.wrong_time = wrong_time
208
325
  self.wrong_attrs = wrong_attrs
209
326
 
210
327
  def error_message(self) -> str:
328
+ """Return the error message for this exception."""
211
329
  msg = f"Found at least one invalid event in column '{self.col_name}'. "
212
330
  if self.wrong_name:
213
331
  msg += (
@@ -228,7 +346,10 @@ class InvalidEventValueInColumn(ValidationError):
228
346
 
229
347
 
230
348
  class InvalidLLMMessageValueInColumn(ValidationError):
349
+ """Raised when LLM message values in column are invalid or malformed."""
350
+
231
351
  def __repr__(self) -> str:
352
+ """Return a string representation for debugging and logging."""
232
353
  return "Invalid_LLM_Message_Value_In_Column"
233
354
 
234
355
  def __init__(
@@ -238,12 +359,21 @@ class InvalidLLMMessageValueInColumn(ValidationError):
238
359
  wrong_content: bool,
239
360
  wrong_tool_calls: bool,
240
361
  ) -> None:
362
+ """Initialize the exception with LLM message validation context.
363
+
364
+ Args:
365
+ col_name: Name of the column containing invalid LLM messages.
366
+ wrong_role: Whether message roles are invalid.
367
+ wrong_content: Whether message contents are invalid.
368
+ wrong_tool_calls: Whether tool calls are invalid.
369
+ """
241
370
  self.col_name = col_name
242
371
  self.wrong_role = wrong_role
243
372
  self.wrong_content = wrong_content
244
373
  self.wrong_tool_calls = wrong_tool_calls
245
374
 
246
375
  def error_message(self) -> str:
376
+ """Return the error message for this exception."""
247
377
  msg = f"Found at least one invalid LLM message in column '{self.col_name}'. "
248
378
  if self.wrong_role:
249
379
  msg += (
@@ -265,17 +395,28 @@ class InvalidLLMMessageValueInColumn(ValidationError):
265
395
 
266
396
 
267
397
  class InvalidEmbeddingValueInColumn(ValidationError):
398
+ """Raised when embedding values in column are invalid or malformed."""
399
+
268
400
  def __repr__(self) -> str:
401
+ """Return a string representation for debugging and logging."""
269
402
  return "Invalid_Embedding_Value_In_Column"
270
403
 
271
404
  def __init__(
272
405
  self, col_name: str, wrong_vector: bool, wrong_text: bool
273
406
  ) -> None:
407
+ """Initialize the exception with embedding value validation context.
408
+
409
+ Args:
410
+ col_name: Name of the column containing invalid embeddings.
411
+ wrong_vector: Whether embedding vectors are invalid.
412
+ wrong_text: Whether embedding texts are invalid.
413
+ """
274
414
  self.col_name = col_name
275
415
  self.wrong_vector = wrong_vector
276
416
  self.wrong_text = wrong_text
277
417
 
278
418
  def error_message(self) -> str:
419
+ """Return the error message for this exception."""
279
420
  msg = f"Found at least one invalid embedding object in column '{self.col_name}'. "
280
421
  if self.wrong_vector:
281
422
  msg += (
@@ -292,7 +433,10 @@ class InvalidEmbeddingValueInColumn(ValidationError):
292
433
 
293
434
 
294
435
  class InvalidDocumentValueInColumn(ValidationError):
436
+ """Raised when document values in column are invalid or malformed."""
437
+
295
438
  def __repr__(self) -> str:
439
+ """Return a string representation for debugging and logging."""
296
440
  return "Invalid_Document_Value_In_Column"
297
441
 
298
442
  def __init__(
@@ -302,12 +446,21 @@ class InvalidDocumentValueInColumn(ValidationError):
302
446
  wrong_content: bool,
303
447
  wrong_metadata: bool,
304
448
  ) -> None:
449
+ """Initialize the exception with document value validation context.
450
+
451
+ Args:
452
+ col_name: Name of the column containing invalid documents.
453
+ wrong_id: Whether document IDs are invalid.
454
+ wrong_content: Whether document contents are invalid.
455
+ wrong_metadata: Whether document metadata is invalid.
456
+ """
305
457
  self.col_name = col_name
306
458
  self.wrong_id = wrong_id
307
459
  self.wrong_content = wrong_content
308
460
  self.wrong_metadata = wrong_metadata
309
461
 
310
462
  def error_message(self) -> str:
463
+ """Return the error message for this exception."""
311
464
  msg = (
312
465
  f"Found at least one invalid document in column '{self.col_name}'. "
313
466
  )
@@ -331,13 +484,22 @@ class InvalidDocumentValueInColumn(ValidationError):
331
484
 
332
485
 
333
486
  class InvalidFloatValueInColumn(ValidationError):
487
+ """Raised when float values in column are invalid or out of range."""
488
+
334
489
  def __repr__(self) -> str:
490
+ """Return a string representation for debugging and logging."""
335
491
  return "Invalid_Float_Value_In_Column"
336
492
 
337
493
  def __init__(self, col_name: str) -> None:
494
+ """Initialize the exception with float value validation context.
495
+
496
+ Args:
497
+ col_name: Name of the column containing invalid float values.
498
+ """
338
499
  self.col_name = col_name
339
500
 
340
501
  def error_message(self) -> str:
502
+ """Return the error message for this exception."""
341
503
  return (
342
504
  f"The column '{self.col_name}' contains invalid float values. "
343
505
  f"Invalid values are +/- infinite values."
@@ -345,13 +507,22 @@ class InvalidFloatValueInColumn(ValidationError):
345
507
 
346
508
 
347
509
  class InvalidNullEvalLabelAndScore(ValidationError):
510
+ """Raised when both eval label and score are null in a record."""
511
+
348
512
  def __repr__(self) -> str:
513
+ """Return a string representation for debugging and logging."""
349
514
  return "Invalid_Null_Eval_Label_And_Score"
350
515
 
351
- def __init__(self, eval_names: List[str]) -> None:
516
+ def __init__(self, eval_names: list[str]) -> None:
517
+ """Initialize the exception with eval label and score validation context.
518
+
519
+ Args:
520
+ eval_names: List of eval names missing both label and score.
521
+ """
352
522
  self.eval_names = eval_names
353
523
 
354
524
  def error_message(self) -> str:
525
+ """Return the error message for this exception."""
355
526
  return (
356
527
  f"There is at least one row without a label and score for the following evals: "
357
528
  f"{log_a_list(self.eval_names, 'and')}"
@@ -359,14 +530,24 @@ class InvalidNullEvalLabelAndScore(ValidationError):
359
530
 
360
531
 
361
532
  class DuplicateAnnotationNameInSpan(ValidationError):
533
+ """Raised when a span contains duplicate annotation names."""
534
+
362
535
  def __repr__(self) -> str:
536
+ """Return a string representation for debugging and logging."""
363
537
  return "Duplicate_Annotation_Name_In_Span"
364
538
 
365
- def __init__(self, span_id: str, duplicate_names: List[str]) -> None:
539
+ def __init__(self, span_id: str, duplicate_names: list[str]) -> None:
540
+ """Initialize the exception with duplicate annotation names context.
541
+
542
+ Args:
543
+ span_id: ID of the span containing duplicate annotations.
544
+ duplicate_names: List of annotation names that are duplicated.
545
+ """
366
546
  self.span_id = span_id
367
547
  self.duplicate_names = duplicate_names
368
548
 
369
549
  def error_message(self) -> str:
550
+ """Return the error message for this exception."""
370
551
  return (
371
552
  f"Found duplicate annotation/eval names within the same span_id '{self.span_id}'. "
372
553
  f"Duplicate names: {log_a_list(self.duplicate_names, 'and')}. "
@@ -376,13 +557,22 @@ class DuplicateAnnotationNameInSpan(ValidationError):
376
557
 
377
558
 
378
559
  class InvalidNullAnnotationLabelAndScore(ValidationError):
560
+ """Raised when both annotation label and score are null in a record."""
561
+
379
562
  def __repr__(self) -> str:
563
+ """Return a string representation for debugging and logging."""
380
564
  return "Invalid_Null_Annotation_Label_And_Score"
381
565
 
382
- def __init__(self, annotation_names: List[str]) -> None:
566
+ def __init__(self, annotation_names: list[str]) -> None:
567
+ """Initialize the exception with annotation validation context.
568
+
569
+ Args:
570
+ annotation_names: List of annotation names missing both label and score.
571
+ """
383
572
  self.annotation_names = annotation_names
384
573
 
385
574
  def error_message(self) -> str:
575
+ """Return the error message for this exception."""
386
576
  return (
387
577
  "There is at least one row where both label and score are missing for the "
388
578
  f"following annotations: {log_a_list(self.annotation_names, 'and')}. "
@@ -391,16 +581,26 @@ class InvalidNullAnnotationLabelAndScore(ValidationError):
391
581
 
392
582
 
393
583
  class InvalidAnnotationColumnFormat(ValidationError):
584
+ """Raised when annotation column format is invalid or malformed."""
585
+
394
586
  def __repr__(self) -> str:
587
+ """Return a string representation for debugging and logging."""
395
588
  return "Invalid_Annotation_Column_Format"
396
589
 
397
590
  def __init__(
398
- self, invalid_format_cols: List[str], expected_format: str
591
+ self, invalid_format_cols: list[str], expected_format: str
399
592
  ) -> None:
593
+ """Initialize the exception with annotation column format validation context.
594
+
595
+ Args:
596
+ invalid_format_cols: List of columns with invalid annotation format.
597
+ expected_format: Expected format for annotation columns.
598
+ """
400
599
  self.invalid_format_cols = invalid_format_cols
401
600
  self.expected_format = expected_format
402
601
 
403
602
  def error_message(self) -> str:
603
+ """Return the error message for this exception."""
404
604
  return (
405
605
  f"The following columns have an invalid annotation column format: "
406
606
  f"{log_a_list(self.invalid_format_cols, 'and')}. "
@@ -1,6 +1,7 @@
1
+ """Common value validation logic for span data."""
2
+
1
3
  import logging
2
- from datetime import datetime, timedelta
3
- from typing import List
4
+ from datetime import datetime, timedelta, timezone
4
5
 
5
6
  import numpy as np
6
7
  import pandas as pd
@@ -31,7 +32,15 @@ logger = logging.getLogger(__name__)
31
32
 
32
33
  def check_invalid_project_name(
33
34
  project_name: str | None,
34
- ) -> List[InvalidProjectName]:
35
+ ) -> list[InvalidProjectName]:
36
+ """Validates that the project name is a non-empty string.
37
+
38
+ Args:
39
+ project_name: The project name to validate.
40
+
41
+ Returns:
42
+ List of validation errors if project name is invalid (empty if valid).
43
+ """
35
44
  # assume it's been coerced to string beforehand
36
45
  if (not isinstance(project_name, str)) or len(project_name.strip()) == 0:
37
46
  return [InvalidProjectName()]
@@ -40,7 +49,15 @@ def check_invalid_project_name(
40
49
 
41
50
  def check_invalid_model_version(
42
51
  model_version: str | None = None,
43
- ) -> List[InvalidModelVersion]:
52
+ ) -> list[InvalidModelVersion]:
53
+ """Validates that the model version, if provided, is a non-empty string.
54
+
55
+ Args:
56
+ model_version: The optional model version to validate.
57
+
58
+ Returns:
59
+ List of validation errors if model version is invalid (empty if valid or None).
60
+ """
44
61
  if model_version is None:
45
62
  return []
46
63
  if not isinstance(model_version, str) or len(model_version.strip()) == 0:
@@ -56,7 +73,20 @@ def check_string_column_value_length(
56
73
  max_len: int,
57
74
  is_required: bool,
58
75
  must_be_json: bool = False,
59
- ) -> List[InvalidMissingValueInColumn | InvalidStringLengthInColumn]:
76
+ ) -> list[InvalidMissingValueInColumn | InvalidStringLengthInColumn]:
77
+ """Validate string column values are within length bounds and optionally valid JSON.
78
+
79
+ Args:
80
+ df: The DataFrame to validate.
81
+ col_name: Name of the column to check.
82
+ min_len: Minimum allowed string length.
83
+ max_len: Maximum allowed string length.
84
+ is_required: Whether the column must have non-null values.
85
+ must_be_json: Whether values must be valid JSON strings. Defaults to False.
86
+
87
+ Returns:
88
+ List of validation errors for missing values, invalid lengths, or invalid JSON.
89
+ """
60
90
  if col_name not in df.columns:
61
91
  return []
62
92
 
@@ -97,9 +127,20 @@ def check_string_column_value_length(
97
127
  def check_string_column_allowed_values(
98
128
  df: pd.DataFrame,
99
129
  col_name: str,
100
- allowed_values: List[str],
130
+ allowed_values: list[str],
101
131
  is_required: bool,
102
- ) -> List[InvalidMissingValueInColumn | InvalidStringValueNotAllowedInColumn]:
132
+ ) -> list[InvalidMissingValueInColumn | InvalidStringValueNotAllowedInColumn]:
133
+ """Validate that string column values are within allowed values.
134
+
135
+ Args:
136
+ df: The DataFrame to validate.
137
+ col_name: The column name to check.
138
+ allowed_values: List of allowed string values (case-insensitive).
139
+ is_required: Whether the column must not have missing values.
140
+
141
+ Returns:
142
+ List of validation errors found.
143
+ """
103
144
  if col_name not in df.columns:
104
145
  return []
105
146
 
@@ -136,7 +177,16 @@ def check_string_column_allowed_values(
136
177
  def check_float_column_valid_numbers(
137
178
  df: pd.DataFrame,
138
179
  col_name: str,
139
- ) -> List[InvalidFloatValueInColumn]:
180
+ ) -> list[InvalidFloatValueInColumn]:
181
+ """Check that float column contains only finite numbers, no infinity values.
182
+
183
+ Args:
184
+ df: The DataFrame to validate.
185
+ col_name: The column name to check.
186
+
187
+ Returns:
188
+ List containing InvalidFloatValueInColumn error if infinite values found.
189
+ """
140
190
  if col_name not in df.columns:
141
191
  return []
142
192
  # np.isinf will fail on None values, change Nones to np.nan and check on that
@@ -145,18 +195,25 @@ def check_float_column_valid_numbers(
145
195
  invalid_exists = invalid_mask.any()
146
196
 
147
197
  if invalid_exists:
148
- error = [InvalidFloatValueInColumn(col_name=col_name)]
149
- return error
198
+ return [InvalidFloatValueInColumn(col_name=col_name)]
150
199
  return []
151
200
 
152
201
 
153
202
  def check_value_columns_start_end_time(
154
203
  df: pd.DataFrame,
155
- ) -> List[
204
+ ) -> list[
156
205
  InvalidMissingValueInColumn
157
206
  | InvalidTimestampValueInColumn
158
207
  | InvalidStartAndEndTimeValuesInColumn
159
208
  ]:
209
+ """Validate start and end time columns for timestamps and logical ordering.
210
+
211
+ Args:
212
+ df: The DataFrame containing start and end time columns.
213
+
214
+ Returns:
215
+ List of validation errors for missing values, invalid timestamps, or start > end.
216
+ """
160
217
  errors = []
161
218
  errors += check_value_timestamp(
162
219
  df=df,
@@ -186,7 +243,17 @@ def check_value_timestamp(
186
243
  df: pd.DataFrame,
187
244
  col_name: str,
188
245
  is_required: bool,
189
- ) -> List[InvalidMissingValueInColumn | InvalidTimestampValueInColumn]:
246
+ ) -> list[InvalidMissingValueInColumn | InvalidTimestampValueInColumn]:
247
+ """Validate timestamp column values are within reasonable bounds.
248
+
249
+ Args:
250
+ df: The DataFrame to validate.
251
+ col_name: The column name containing timestamps in nanoseconds.
252
+ is_required: Whether missing values should be flagged as errors.
253
+
254
+ Returns:
255
+ List of validation errors for missing or out-of-bounds timestamps.
256
+ """
190
257
  # This check expects that timestamps have previously been converted to nanoseconds
191
258
  if col_name not in df.columns:
192
259
  return []
@@ -199,7 +266,7 @@ def check_value_timestamp(
199
266
  )
200
267
  )
201
268
 
202
- now_t = datetime.now()
269
+ now_t = datetime.now(tz=timezone.utc)
203
270
  lbound, ubound = (
204
271
  (
205
272
  now_t - timedelta(days=MAX_PAST_YEARS_FROM_CURRENT_TIME * 365)
@@ -0,0 +1 @@
1
+ """Evaluation data validation for LLM tracing spans."""