arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +17 -9
- arize/_exporter/client.py +55 -36
- arize/_exporter/parsers/tracing_data_parser.py +41 -30
- arize/_exporter/validation.py +3 -3
- arize/_flight/client.py +208 -77
- arize/_generated/api_client/__init__.py +30 -6
- arize/_generated/api_client/api/__init__.py +1 -0
- arize/_generated/api_client/api/datasets_api.py +864 -190
- arize/_generated/api_client/api/experiments_api.py +167 -131
- arize/_generated/api_client/api/projects_api.py +1197 -0
- arize/_generated/api_client/api_client.py +2 -2
- arize/_generated/api_client/configuration.py +42 -34
- arize/_generated/api_client/exceptions.py +2 -2
- arize/_generated/api_client/models/__init__.py +15 -4
- arize/_generated/api_client/models/dataset.py +10 -10
- arize/_generated/api_client/models/dataset_example.py +111 -0
- arize/_generated/api_client/models/dataset_example_update.py +100 -0
- arize/_generated/api_client/models/dataset_version.py +13 -13
- arize/_generated/api_client/models/datasets_create_request.py +16 -8
- arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
- arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
- arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
- arize/_generated/api_client/models/datasets_list200_response.py +10 -4
- arize/_generated/api_client/models/experiment.py +14 -16
- arize/_generated/api_client/models/experiment_run.py +108 -0
- arize/_generated/api_client/models/experiment_run_create.py +102 -0
- arize/_generated/api_client/models/experiments_create_request.py +16 -10
- arize/_generated/api_client/models/experiments_list200_response.py +10 -4
- arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
- arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
- arize/_generated/api_client/models/primitive_value.py +172 -0
- arize/_generated/api_client/models/problem.py +100 -0
- arize/_generated/api_client/models/project.py +99 -0
- arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
- arize/_generated/api_client/models/projects_list200_response.py +106 -0
- arize/_generated/api_client/rest.py +2 -2
- arize/_generated/api_client/test/test_dataset.py +4 -2
- arize/_generated/api_client/test/test_dataset_example.py +56 -0
- arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
- arize/_generated/api_client/test/test_dataset_version.py +7 -2
- arize/_generated/api_client/test/test_datasets_api.py +27 -13
- arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
- arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
- arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
- arize/_generated/api_client/test/test_experiment.py +2 -4
- arize/_generated/api_client/test/test_experiment_run.py +56 -0
- arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
- arize/_generated/api_client/test/test_experiments_api.py +6 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
- arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
- arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
- arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
- arize/_generated/api_client/test/test_problem.py +57 -0
- arize/_generated/api_client/test/test_project.py +58 -0
- arize/_generated/api_client/test/test_projects_api.py +59 -0
- arize/_generated/api_client/test/test_projects_create_request.py +54 -0
- arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
- arize/_generated/api_client_README.md +43 -29
- arize/_generated/protocol/flight/flight_pb2.py +400 -0
- arize/_lazy.py +27 -19
- arize/client.py +269 -55
- arize/config.py +365 -116
- arize/constants/__init__.py +1 -0
- arize/constants/config.py +11 -4
- arize/constants/ml.py +6 -4
- arize/constants/openinference.py +2 -0
- arize/constants/pyarrow.py +2 -0
- arize/constants/spans.py +3 -1
- arize/datasets/__init__.py +1 -0
- arize/datasets/client.py +299 -84
- arize/datasets/errors.py +32 -2
- arize/datasets/validation.py +18 -8
- arize/embeddings/__init__.py +2 -0
- arize/embeddings/auto_generator.py +23 -19
- arize/embeddings/base_generators.py +89 -36
- arize/embeddings/constants.py +2 -0
- arize/embeddings/cv_generators.py +26 -4
- arize/embeddings/errors.py +27 -5
- arize/embeddings/nlp_generators.py +31 -12
- arize/embeddings/tabular_generators.py +32 -20
- arize/embeddings/usecases.py +12 -2
- arize/exceptions/__init__.py +1 -0
- arize/exceptions/auth.py +11 -1
- arize/exceptions/base.py +29 -4
- arize/exceptions/models.py +21 -2
- arize/exceptions/parameters.py +31 -0
- arize/exceptions/spaces.py +12 -1
- arize/exceptions/types.py +86 -7
- arize/exceptions/values.py +220 -20
- arize/experiments/__init__.py +1 -0
- arize/experiments/client.py +390 -286
- arize/experiments/evaluators/__init__.py +1 -0
- arize/experiments/evaluators/base.py +74 -41
- arize/experiments/evaluators/exceptions.py +6 -3
- arize/experiments/evaluators/executors.py +121 -73
- arize/experiments/evaluators/rate_limiters.py +106 -57
- arize/experiments/evaluators/types.py +34 -7
- arize/experiments/evaluators/utils.py +65 -27
- arize/experiments/functions.py +103 -101
- arize/experiments/tracing.py +52 -44
- arize/experiments/types.py +56 -31
- arize/logging.py +54 -22
- arize/models/__init__.py +1 -0
- arize/models/batch_validation/__init__.py +1 -0
- arize/models/batch_validation/errors.py +543 -65
- arize/models/batch_validation/validator.py +339 -300
- arize/models/bounded_executor.py +20 -7
- arize/models/casting.py +75 -29
- arize/models/client.py +326 -107
- arize/models/proto.py +95 -40
- arize/models/stream_validation.py +42 -14
- arize/models/surrogate_explainer/__init__.py +1 -0
- arize/models/surrogate_explainer/mimic.py +24 -13
- arize/pre_releases.py +43 -0
- arize/projects/__init__.py +1 -0
- arize/projects/client.py +129 -0
- arize/regions.py +40 -0
- arize/spans/__init__.py +1 -0
- arize/spans/client.py +130 -106
- arize/spans/columns.py +13 -0
- arize/spans/conversion.py +54 -38
- arize/spans/validation/__init__.py +1 -0
- arize/spans/validation/annotations/__init__.py +1 -0
- arize/spans/validation/annotations/annotations_validation.py +6 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
- arize/spans/validation/annotations/value_validation.py +35 -11
- arize/spans/validation/common/__init__.py +1 -0
- arize/spans/validation/common/argument_validation.py +33 -8
- arize/spans/validation/common/dataframe_form_validation.py +35 -9
- arize/spans/validation/common/errors.py +211 -11
- arize/spans/validation/common/value_validation.py +80 -13
- arize/spans/validation/evals/__init__.py +1 -0
- arize/spans/validation/evals/dataframe_form_validation.py +28 -8
- arize/spans/validation/evals/evals_validation.py +34 -4
- arize/spans/validation/evals/value_validation.py +26 -3
- arize/spans/validation/metadata/__init__.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +14 -5
- arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
- arize/spans/validation/metadata/value_validation.py +24 -10
- arize/spans/validation/spans/__init__.py +1 -0
- arize/spans/validation/spans/dataframe_form_validation.py +34 -13
- arize/spans/validation/spans/spans_validation.py +35 -4
- arize/spans/validation/spans/value_validation.py +76 -7
- arize/types.py +293 -157
- arize/utils/__init__.py +1 -0
- arize/utils/arrow.py +31 -15
- arize/utils/cache.py +34 -6
- arize/utils/dataframe.py +19 -2
- arize/utils/online_tasks/__init__.py +2 -0
- arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
- arize/utils/openinference_conversion.py +44 -5
- arize/utils/proto.py +10 -0
- arize/utils/size.py +5 -3
- arize/version.py +3 -1
- {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
- arize-8.0.0a23.dist-info/RECORD +174 -0
- {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
- arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
- arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
- arize/_generated/protocol/flight/export_pb2.py +0 -61
- arize/_generated/protocol/flight/ingest_pb2.py +0 -365
- arize-8.0.0a21.dist-info/RECORD +0 -146
- arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
"""Batch validation error classes for ML model data."""
|
|
2
|
+
|
|
1
3
|
from __future__ import annotations
|
|
2
4
|
|
|
3
5
|
from abc import ABC, abstractmethod
|
|
4
|
-
from
|
|
5
|
-
from typing import TYPE_CHECKING, Dict, List, Optional
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
6
7
|
|
|
7
8
|
from arize.constants.ml import (
|
|
8
9
|
MAX_EMBEDDING_DIMENSIONALITY,
|
|
@@ -18,24 +19,36 @@ from arize.logging import log_a_list
|
|
|
18
19
|
from arize.types import Environments, ModelTypes
|
|
19
20
|
|
|
20
21
|
if TYPE_CHECKING:
|
|
22
|
+
from collections.abc import Iterable
|
|
23
|
+
|
|
21
24
|
from arize.types import Metrics
|
|
22
25
|
|
|
23
26
|
|
|
24
27
|
class ValidationError(Exception, ABC):
|
|
28
|
+
"""Base class for validation errors during batch data ingestion."""
|
|
29
|
+
|
|
25
30
|
def __str__(self) -> str:
|
|
31
|
+
"""Return a human-readable error message."""
|
|
26
32
|
return self.error_message()
|
|
27
33
|
|
|
28
34
|
@abstractmethod
|
|
29
35
|
def __repr__(self) -> str:
|
|
30
|
-
|
|
36
|
+
"""Return a string representation for debugging and logging."""
|
|
31
37
|
|
|
32
38
|
@abstractmethod
|
|
33
39
|
def error_message(self) -> str:
|
|
34
|
-
|
|
40
|
+
"""Return the error message for this exception."""
|
|
35
41
|
|
|
36
42
|
|
|
37
43
|
class ValidationFailure(Exception):
|
|
38
|
-
|
|
44
|
+
"""Raised when validation encounters multiple errors during processing."""
|
|
45
|
+
|
|
46
|
+
def __init__(self, errors: list[ValidationError]) -> None:
|
|
47
|
+
"""Initialize the exception with a list of validation errors.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
errors: List of validation errors encountered during processing.
|
|
51
|
+
"""
|
|
39
52
|
self.errors = errors
|
|
40
53
|
|
|
41
54
|
|
|
@@ -43,11 +56,14 @@ class ValidationFailure(Exception):
|
|
|
43
56
|
# Minimum required checks
|
|
44
57
|
# ----------------------
|
|
45
58
|
class InvalidColumnNameEmptyString(ValidationError):
|
|
59
|
+
"""Raised when a schema contains an empty string as a column name."""
|
|
60
|
+
|
|
46
61
|
def __repr__(self) -> str:
|
|
62
|
+
"""Return a string representation for debugging and logging."""
|
|
47
63
|
return "Invalid_Column_Name_Empty_String"
|
|
48
64
|
|
|
49
|
-
|
|
50
|
-
|
|
65
|
+
def error_message(self) -> str:
|
|
66
|
+
"""Return the error message for this exception."""
|
|
51
67
|
return (
|
|
52
68
|
"Empty column name found: ''. The schema cannot point to columns in the "
|
|
53
69
|
"dataframe denoted by an empty string. You can see the columns used in the "
|
|
@@ -56,14 +72,24 @@ class InvalidColumnNameEmptyString(ValidationError):
|
|
|
56
72
|
|
|
57
73
|
|
|
58
74
|
class InvalidFieldTypeConversion(ValidationError):
|
|
75
|
+
"""Raised when field values cannot be converted to the required type."""
|
|
76
|
+
|
|
59
77
|
def __repr__(self) -> str:
|
|
78
|
+
"""Return a string representation for debugging and logging."""
|
|
60
79
|
return "Invalid_Input_Type_Conversion"
|
|
61
80
|
|
|
62
81
|
def __init__(self, fields: Iterable, type: str) -> None:
|
|
82
|
+
"""Initialize the exception with field type conversion context.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
fields: Fields that failed type conversion.
|
|
86
|
+
type: Expected type for the fields.
|
|
87
|
+
"""
|
|
63
88
|
self.fields = fields
|
|
64
89
|
self.type = type
|
|
65
90
|
|
|
66
91
|
def error_message(self) -> str:
|
|
92
|
+
"""Return the error message for this exception."""
|
|
67
93
|
return (
|
|
68
94
|
f"The following fields must be convertible to {self.type}: "
|
|
69
95
|
f"{', '.join(map(str, self.fields))}."
|
|
@@ -71,13 +97,17 @@ class InvalidFieldTypeConversion(ValidationError):
|
|
|
71
97
|
|
|
72
98
|
|
|
73
99
|
class InvalidFieldTypeEmbeddingFeatures(ValidationError):
|
|
100
|
+
"""Raised when embedding feature column names are not properly formatted."""
|
|
101
|
+
|
|
74
102
|
def __repr__(self) -> str:
|
|
103
|
+
"""Return a string representation for debugging and logging."""
|
|
75
104
|
return "Invalid_Input_Type_Embedding_Features"
|
|
76
105
|
|
|
77
106
|
def __init__(self) -> None:
|
|
78
|
-
|
|
107
|
+
"""Initialize the exception."""
|
|
79
108
|
|
|
80
109
|
def error_message(self) -> str:
|
|
110
|
+
"""Return the error message for this exception."""
|
|
81
111
|
return (
|
|
82
112
|
"schema.embedding_feature_column_names should be a dictionary mapping strings "
|
|
83
113
|
"to EmbeddingColumnNames objects"
|
|
@@ -85,21 +115,34 @@ class InvalidFieldTypeEmbeddingFeatures(ValidationError):
|
|
|
85
115
|
|
|
86
116
|
|
|
87
117
|
class InvalidFieldTypePromptResponse(ValidationError):
|
|
118
|
+
"""Raised when prompt response field is not of correct type."""
|
|
119
|
+
|
|
88
120
|
def __repr__(self) -> str:
|
|
121
|
+
"""Return a string representation for debugging and logging."""
|
|
89
122
|
return "Invalid_Input_Type_Prompt_Response"
|
|
90
123
|
|
|
91
124
|
def __init__(self, name: str) -> None:
|
|
125
|
+
"""Initialize the exception with field name context.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
name: Name of the field with invalid prompt response type.
|
|
129
|
+
"""
|
|
92
130
|
self.name = name
|
|
93
131
|
|
|
94
132
|
def error_message(self) -> str:
|
|
133
|
+
"""Return the error message for this exception."""
|
|
95
134
|
return f"'{self.name}' must be of type str or EmbeddingColumnNames"
|
|
96
135
|
|
|
97
136
|
|
|
98
137
|
class InvalidDataFrameIndex(ValidationError):
|
|
138
|
+
"""Raised when the dataframe index is invalid and needs to be reset."""
|
|
139
|
+
|
|
99
140
|
def __repr__(self) -> str:
|
|
141
|
+
"""Return a string representation for debugging and logging."""
|
|
100
142
|
return "Invalid_Index"
|
|
101
143
|
|
|
102
144
|
def error_message(self) -> str:
|
|
145
|
+
"""Return the error message for this exception."""
|
|
103
146
|
return (
|
|
104
147
|
"The index of the dataframe is invalid; "
|
|
105
148
|
"reset the index by using df.reset_index(drop=True, inplace=True)"
|
|
@@ -107,14 +150,24 @@ class InvalidDataFrameIndex(ValidationError):
|
|
|
107
150
|
|
|
108
151
|
|
|
109
152
|
class InvalidSchemaType(ValidationError):
|
|
153
|
+
"""Raised when schema type is incompatible with the model environment."""
|
|
154
|
+
|
|
110
155
|
def __repr__(self) -> str:
|
|
156
|
+
"""Return a string representation for debugging and logging."""
|
|
111
157
|
return "Invalid_Schema_Type"
|
|
112
158
|
|
|
113
159
|
def __init__(self, schema_type: str, environment: Environments) -> None:
|
|
160
|
+
"""Initialize the exception with schema type and environment context.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
schema_type: Type of schema that is invalid.
|
|
164
|
+
environment: Model environment where schema is being used.
|
|
165
|
+
"""
|
|
114
166
|
self.schema_type = schema_type
|
|
115
167
|
self.environment = environment
|
|
116
168
|
|
|
117
169
|
def error_message(self) -> str:
|
|
170
|
+
"""Return the error message for this exception."""
|
|
118
171
|
return f"Cannot use a {self.schema_type} for a model with environment: {self.environment}"
|
|
119
172
|
|
|
120
173
|
|
|
@@ -124,14 +177,26 @@ class InvalidSchemaType(ValidationError):
|
|
|
124
177
|
|
|
125
178
|
|
|
126
179
|
class MissingPredictionIdColumnForDelayedRecords(ValidationError):
|
|
180
|
+
"""Raised when prediction ID is missing for delayed actuals or feature importance."""
|
|
181
|
+
|
|
127
182
|
def __repr__(self) -> str:
|
|
183
|
+
"""Return a string representation for debugging and logging."""
|
|
128
184
|
return "Missing_Prediction_Id_Column_For_Delayed_Records"
|
|
129
185
|
|
|
130
|
-
def __init__(
|
|
186
|
+
def __init__(
|
|
187
|
+
self, has_actual_info: bool, has_feature_importance_info: bool
|
|
188
|
+
) -> None:
|
|
189
|
+
"""Initialize the exception with delayed record context.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
has_actual_info: Whether actual information is present.
|
|
193
|
+
has_feature_importance_info: Whether feature importance information is present.
|
|
194
|
+
"""
|
|
131
195
|
self.has_actual_info = has_actual_info
|
|
132
196
|
self.has_feature_importance_info = has_feature_importance_info
|
|
133
197
|
|
|
134
198
|
def error_message(self) -> str:
|
|
199
|
+
"""Return the error message for this exception."""
|
|
135
200
|
actual = "actual" if self.has_actual_info else ""
|
|
136
201
|
feat_imp = (
|
|
137
202
|
"feature importance" if self.has_feature_importance_info else ""
|
|
@@ -151,13 +216,22 @@ class MissingPredictionIdColumnForDelayedRecords(ValidationError):
|
|
|
151
216
|
|
|
152
217
|
|
|
153
218
|
class MissingColumns(ValidationError):
|
|
219
|
+
"""Raised when columns declared in schema are not found in dataframe."""
|
|
220
|
+
|
|
154
221
|
def __repr__(self) -> str:
|
|
222
|
+
"""Return a string representation for debugging and logging."""
|
|
155
223
|
return "Missing_Columns"
|
|
156
224
|
|
|
157
225
|
def __init__(self, cols: Iterable) -> None:
|
|
226
|
+
"""Initialize the exception with missing columns context.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
cols: Columns declared in schema but not found in dataframe.
|
|
230
|
+
"""
|
|
158
231
|
self.missing_cols = set(cols)
|
|
159
232
|
|
|
160
233
|
def error_message(self) -> str:
|
|
234
|
+
"""Return the error message for this exception."""
|
|
161
235
|
return (
|
|
162
236
|
"The following columns are declared in the schema "
|
|
163
237
|
"but are not found in the dataframe: "
|
|
@@ -166,21 +240,28 @@ class MissingColumns(ValidationError):
|
|
|
166
240
|
|
|
167
241
|
|
|
168
242
|
class MissingRequiredColumnsMetricsValidation(ValidationError):
|
|
169
|
-
"""
|
|
170
|
-
This error is used only for model mapping validations.
|
|
171
|
-
"""
|
|
243
|
+
"""This error is used only for model mapping validations."""
|
|
172
244
|
|
|
173
245
|
def __repr__(self) -> str:
|
|
246
|
+
"""Return a string representation for debugging and logging."""
|
|
174
247
|
return "Missing_Columns_Required_By_Metrics_Validation"
|
|
175
248
|
|
|
176
249
|
def __init__(
|
|
177
|
-
self, model_type: ModelTypes, metrics:
|
|
250
|
+
self, model_type: ModelTypes, metrics: list[Metrics], cols: Iterable
|
|
178
251
|
) -> None:
|
|
252
|
+
"""Initialize the exception with model metrics validation context.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
model_type: Type of model being validated.
|
|
256
|
+
metrics: List of metrics requiring validation.
|
|
257
|
+
cols: Required columns that are missing.
|
|
258
|
+
"""
|
|
179
259
|
self.model_type = model_type
|
|
180
260
|
self.metrics = metrics
|
|
181
261
|
self.missing_cols = cols
|
|
182
262
|
|
|
183
263
|
def error_message(self) -> str:
|
|
264
|
+
"""Return the error message for this exception."""
|
|
184
265
|
return (
|
|
185
266
|
f"For logging data for a {self.model_type.name} model with support for metrics "
|
|
186
267
|
f"{', '.join(m.name for m in self.metrics)}, "
|
|
@@ -189,13 +270,22 @@ class MissingRequiredColumnsMetricsValidation(ValidationError):
|
|
|
189
270
|
|
|
190
271
|
|
|
191
272
|
class ReservedColumns(ValidationError):
|
|
273
|
+
"""Raised when reserved column names are used in schema fields."""
|
|
274
|
+
|
|
192
275
|
def __repr__(self) -> str:
|
|
276
|
+
"""Return a string representation for debugging and logging."""
|
|
193
277
|
return "Reserved_Columns"
|
|
194
278
|
|
|
195
279
|
def __init__(self, cols: Iterable) -> None:
|
|
280
|
+
"""Initialize the exception with reserved columns context.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
cols: Reserved columns that cannot be used in schema fields.
|
|
284
|
+
"""
|
|
196
285
|
self.reserved_columns = cols
|
|
197
286
|
|
|
198
287
|
def error_message(self) -> str:
|
|
288
|
+
"""Return the error message for this exception."""
|
|
199
289
|
return (
|
|
200
290
|
"The following columns are reserved and can only be specified "
|
|
201
291
|
"in the proper fields of the schema: "
|
|
@@ -204,24 +294,31 @@ class ReservedColumns(ValidationError):
|
|
|
204
294
|
|
|
205
295
|
|
|
206
296
|
class InvalidModelTypeAndMetricsCombination(ValidationError):
|
|
207
|
-
"""
|
|
208
|
-
This error is used only for model mapping validations.
|
|
209
|
-
"""
|
|
297
|
+
"""This error is used only for model mapping validations."""
|
|
210
298
|
|
|
211
299
|
def __repr__(self) -> str:
|
|
300
|
+
"""Return a string representation for debugging and logging."""
|
|
212
301
|
return "Invalid_ModelType_And_Metrics_Combination"
|
|
213
302
|
|
|
214
303
|
def __init__(
|
|
215
304
|
self,
|
|
216
305
|
model_type: ModelTypes,
|
|
217
|
-
metrics:
|
|
218
|
-
suggested_model_metric_combinations:
|
|
306
|
+
metrics: list[Metrics],
|
|
307
|
+
suggested_model_metric_combinations: list[list[str]],
|
|
219
308
|
) -> None:
|
|
309
|
+
"""Initialize the exception with model type and metrics combination context.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
model_type: Type of model being validated.
|
|
313
|
+
metrics: List of metrics that form invalid combination with model type.
|
|
314
|
+
suggested_model_metric_combinations: Valid metric combinations for the model type.
|
|
315
|
+
"""
|
|
220
316
|
self.model_type = model_type
|
|
221
317
|
self.metrics = metrics
|
|
222
318
|
self.suggested_combinations = suggested_model_metric_combinations
|
|
223
319
|
|
|
224
320
|
def error_message(self) -> str:
|
|
321
|
+
"""Return the error message for this exception."""
|
|
225
322
|
valid_combos = ", or \n".join(
|
|
226
323
|
"[" + ", ".join(combo) + "]"
|
|
227
324
|
for combo in self.suggested_combinations
|
|
@@ -234,13 +331,22 @@ class InvalidModelTypeAndMetricsCombination(ValidationError):
|
|
|
234
331
|
|
|
235
332
|
|
|
236
333
|
class InvalidShapSuffix(ValidationError):
|
|
334
|
+
"""Raised when feature or tag names use the reserved '_shap' suffix."""
|
|
335
|
+
|
|
237
336
|
def __repr__(self) -> str:
|
|
337
|
+
"""Return a string representation for debugging and logging."""
|
|
238
338
|
return "Invalid_SHAP_Suffix"
|
|
239
339
|
|
|
240
340
|
def __init__(self, cols: Iterable) -> None:
|
|
341
|
+
"""Initialize the exception with invalid SHAP suffix columns.
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
cols: Feature or tag columns using the reserved '_shap' suffix.
|
|
345
|
+
"""
|
|
241
346
|
self.invalid_column_names = cols
|
|
242
347
|
|
|
243
348
|
def error_message(self) -> str:
|
|
349
|
+
"""Return the error message for this exception."""
|
|
244
350
|
return (
|
|
245
351
|
"The following features or tags must not be named with a `_shap` suffix: "
|
|
246
352
|
f"{', '.join(map(str, self.invalid_column_names))}."
|
|
@@ -248,10 +354,14 @@ class InvalidShapSuffix(ValidationError):
|
|
|
248
354
|
|
|
249
355
|
|
|
250
356
|
class InvalidModelType(ValidationError):
|
|
357
|
+
"""Raised when an invalid model type is specified."""
|
|
358
|
+
|
|
251
359
|
def __repr__(self) -> str:
|
|
360
|
+
"""Return a string representation for debugging and logging."""
|
|
252
361
|
return "Invalid_Model_Type"
|
|
253
362
|
|
|
254
363
|
def error_message(self) -> str:
|
|
364
|
+
"""Return the error message for this exception."""
|
|
255
365
|
return (
|
|
256
366
|
"Model type not valid. Choose one of the following: "
|
|
257
367
|
f"{', '.join('ModelTypes.' + mt.name for mt in ModelTypes)}. "
|
|
@@ -259,10 +369,14 @@ class InvalidModelType(ValidationError):
|
|
|
259
369
|
|
|
260
370
|
|
|
261
371
|
class InvalidEnvironment(ValidationError):
|
|
372
|
+
"""Raised when an invalid environment is specified."""
|
|
373
|
+
|
|
262
374
|
def __repr__(self) -> str:
|
|
375
|
+
"""Return a string representation for debugging and logging."""
|
|
263
376
|
return "Invalid_Environment"
|
|
264
377
|
|
|
265
378
|
def error_message(self) -> str:
|
|
379
|
+
"""Return the error message for this exception."""
|
|
266
380
|
return (
|
|
267
381
|
"Environment not valid. Choose one of the following: "
|
|
268
382
|
f"{', '.join('Environments.' + env.name for env in Environments)}. "
|
|
@@ -270,34 +384,50 @@ class InvalidEnvironment(ValidationError):
|
|
|
270
384
|
|
|
271
385
|
|
|
272
386
|
class InvalidBatchId(ValidationError):
|
|
387
|
+
"""Raised when batch ID is missing or invalid for validation environment."""
|
|
388
|
+
|
|
273
389
|
def __repr__(self) -> str:
|
|
390
|
+
"""Return a string representation for debugging and logging."""
|
|
274
391
|
return "Invalid_Batch_ID"
|
|
275
392
|
|
|
276
393
|
def error_message(self) -> str:
|
|
394
|
+
"""Return the error message for this exception."""
|
|
277
395
|
return "Batch ID must be a nonempty string if logging to validation environment."
|
|
278
396
|
|
|
279
397
|
|
|
280
398
|
class InvalidModelVersion(ValidationError):
|
|
399
|
+
"""Raised when model version is empty or invalid."""
|
|
400
|
+
|
|
281
401
|
def __repr__(self) -> str:
|
|
402
|
+
"""Return a string representation for debugging and logging."""
|
|
282
403
|
return "Invalid_Model_Version"
|
|
283
404
|
|
|
284
405
|
def error_message(self) -> str:
|
|
406
|
+
"""Return the error message for this exception."""
|
|
285
407
|
return "Model version must be a nonempty string."
|
|
286
408
|
|
|
287
409
|
|
|
288
410
|
class InvalidModelId(ValidationError):
|
|
411
|
+
"""Raised when model ID is empty or invalid."""
|
|
412
|
+
|
|
289
413
|
def __repr__(self) -> str:
|
|
414
|
+
"""Return a string representation for debugging and logging."""
|
|
290
415
|
return "Invalid_Model_ID"
|
|
291
416
|
|
|
292
417
|
def error_message(self) -> str:
|
|
418
|
+
"""Return the error message for this exception."""
|
|
293
419
|
return "Model ID must be a nonempty string."
|
|
294
420
|
|
|
295
421
|
|
|
296
422
|
class InvalidProjectName(ValidationError):
|
|
423
|
+
"""Raised when project name is empty or invalid."""
|
|
424
|
+
|
|
297
425
|
def __repr__(self) -> str:
|
|
426
|
+
"""Return a string representation for debugging and logging."""
|
|
298
427
|
return "Invalid_Project_Name"
|
|
299
428
|
|
|
300
429
|
def error_message(self) -> str:
|
|
430
|
+
"""Return the error message for this exception."""
|
|
301
431
|
return (
|
|
302
432
|
"Project Name must be a nonempty string. "
|
|
303
433
|
"If Model ID was used instead of Project Name, "
|
|
@@ -306,10 +436,14 @@ class InvalidProjectName(ValidationError):
|
|
|
306
436
|
|
|
307
437
|
|
|
308
438
|
class MissingPredActShap(ValidationError):
|
|
439
|
+
"""Raised when schema is missing prediction, actual, or SHAP values."""
|
|
440
|
+
|
|
309
441
|
def __repr__(self) -> str:
|
|
442
|
+
"""Return a string representation for debugging and logging."""
|
|
310
443
|
return "Missing_Pred_or_Act_or_SHAP"
|
|
311
444
|
|
|
312
445
|
def error_message(self) -> str:
|
|
446
|
+
"""Return the error message for this exception."""
|
|
313
447
|
return (
|
|
314
448
|
"The schema must specify at least one of the following: "
|
|
315
449
|
"prediction label, actual label, or SHAP value column names"
|
|
@@ -317,27 +451,41 @@ class MissingPredActShap(ValidationError):
|
|
|
317
451
|
|
|
318
452
|
|
|
319
453
|
class MissingPreprodPredAct(ValidationError):
|
|
454
|
+
"""Raised when pre-production data is missing both prediction and actual labels."""
|
|
455
|
+
|
|
320
456
|
def __repr__(self) -> str:
|
|
457
|
+
"""Return a string representation for debugging and logging."""
|
|
321
458
|
return "Missing_Preproduction_Pred_and_Act"
|
|
322
459
|
|
|
323
460
|
def error_message(self) -> str:
|
|
324
|
-
|
|
325
|
-
|
|
461
|
+
"""Return the error message for this exception."""
|
|
462
|
+
return (
|
|
463
|
+
"For logging pre-production data, the schema must specify both "
|
|
464
|
+
"prediction and actual label columns."
|
|
465
|
+
)
|
|
326
466
|
|
|
327
467
|
|
|
328
468
|
class MissingPreprodAct(ValidationError):
|
|
469
|
+
"""Raised when pre-production data is missing actual label column."""
|
|
470
|
+
|
|
329
471
|
def __repr__(self) -> str:
|
|
472
|
+
"""Return a string representation for debugging and logging."""
|
|
330
473
|
return "Missing_Preproduction_Act"
|
|
331
474
|
|
|
332
475
|
def error_message(self) -> str:
|
|
476
|
+
"""Return the error message for this exception."""
|
|
333
477
|
return "For logging pre-production data, the schema must specify actual label column."
|
|
334
478
|
|
|
335
479
|
|
|
336
480
|
class MissingPreprodPredActNumericAndCategorical(ValidationError):
|
|
481
|
+
"""Raised when pre-production numeric/categorical model is missing prediction or actual columns."""
|
|
482
|
+
|
|
337
483
|
def __repr__(self) -> str:
|
|
484
|
+
"""Return a string representation for debugging and logging."""
|
|
338
485
|
return "Missing_Preproduction_Pred_and_Act_Numeric_and_Categorical"
|
|
339
486
|
|
|
340
487
|
def error_message(self) -> str:
|
|
488
|
+
"""Return the error message for this exception."""
|
|
341
489
|
return (
|
|
342
490
|
"For logging pre-production data for a numeric or a categorical model, "
|
|
343
491
|
"the schema must specify both prediction and actual label or score columns."
|
|
@@ -345,10 +493,14 @@ class MissingPreprodPredActNumericAndCategorical(ValidationError):
|
|
|
345
493
|
|
|
346
494
|
|
|
347
495
|
class MissingRequiredColumnsForRankingModel(ValidationError):
|
|
496
|
+
"""Raised when ranking model is missing required group ID or rank columns."""
|
|
497
|
+
|
|
348
498
|
def __repr__(self) -> str:
|
|
499
|
+
"""Return a string representation for debugging and logging."""
|
|
349
500
|
return "Missing_Required_Columns_For_Ranking_Model"
|
|
350
501
|
|
|
351
502
|
def error_message(self) -> str:
|
|
503
|
+
"""Return the error message for this exception."""
|
|
352
504
|
return (
|
|
353
505
|
"For logging data for a ranking model, schema must specify: "
|
|
354
506
|
"prediction_group_id_column_name and rank_column_name"
|
|
@@ -356,13 +508,22 @@ class MissingRequiredColumnsForRankingModel(ValidationError):
|
|
|
356
508
|
|
|
357
509
|
|
|
358
510
|
class MissingCVPredAct(ValidationError):
|
|
511
|
+
"""Raised when computer vision model is missing prediction or actual columns."""
|
|
512
|
+
|
|
359
513
|
def __repr__(self) -> str:
|
|
514
|
+
"""Return a string representation for debugging and logging."""
|
|
360
515
|
return "Missing_CV_Prediction_or_Actual"
|
|
361
516
|
|
|
362
|
-
def __init__(self, environment: Environments):
|
|
517
|
+
def __init__(self, environment: Environments) -> None:
|
|
518
|
+
"""Initialize the exception with environment context.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
environment: Model environment (training, validation, or production).
|
|
522
|
+
"""
|
|
363
523
|
self.environment = environment
|
|
364
524
|
|
|
365
525
|
def error_message(self) -> str:
|
|
526
|
+
"""Return the error message for this exception."""
|
|
366
527
|
if self.environment in (Environments.TRAINING, Environments.VALIDATION):
|
|
367
528
|
env = "pre-production"
|
|
368
529
|
opt = "and"
|
|
@@ -372,7 +533,7 @@ class MissingCVPredAct(ValidationError):
|
|
|
372
533
|
else:
|
|
373
534
|
raise TypeError("Invalid environment")
|
|
374
535
|
return (
|
|
375
|
-
f"For logging {env} data for an Object Detection model,"
|
|
536
|
+
f"For logging {env} data for an Object Detection model, "
|
|
376
537
|
"the schema must specify one of: "
|
|
377
538
|
f"('object_detection_prediction_column_names' {opt} "
|
|
378
539
|
f"'object_detection_actual_column_names') "
|
|
@@ -384,32 +545,50 @@ class MissingCVPredAct(ValidationError):
|
|
|
384
545
|
|
|
385
546
|
|
|
386
547
|
class MultipleCVPredAct(ValidationError):
|
|
548
|
+
"""Raised when multiple computer vision prediction/actual types are specified."""
|
|
549
|
+
|
|
387
550
|
def __repr__(self) -> str:
|
|
551
|
+
"""Return a string representation for debugging and logging."""
|
|
388
552
|
return "Multiple_CV_Prediction_or_Actual"
|
|
389
553
|
|
|
390
|
-
def __init__(self, environment: Environments):
|
|
554
|
+
def __init__(self, environment: Environments) -> None:
|
|
555
|
+
"""Initialize the exception with environment context.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
environment: Model environment where multiple CV types were specified.
|
|
559
|
+
"""
|
|
391
560
|
self.environment = environment
|
|
392
561
|
|
|
393
562
|
def error_message(self) -> str:
|
|
563
|
+
"""Return the error message for this exception."""
|
|
394
564
|
return (
|
|
395
565
|
"The schema must only specify one of the following: "
|
|
396
|
-
"'object_detection_prediction_column_names'/'object_detection_actual_column_names'"
|
|
397
|
-
"'semantic_segmentation_prediction_column_names'/'semantic_segmentation_actual_column_names'"
|
|
398
|
-
"'instance_segmentation_prediction_column_names'/'instance_segmentation_actual_column_names'"
|
|
566
|
+
"'object_detection_prediction_column_names'/'object_detection_actual_column_names', "
|
|
567
|
+
"'semantic_segmentation_prediction_column_names'/'semantic_segmentation_actual_column_names', "
|
|
568
|
+
"'instance_segmentation_prediction_column_names'/'instance_segmentation_actual_column_names'."
|
|
399
569
|
)
|
|
400
570
|
|
|
401
571
|
|
|
402
572
|
class InvalidPredActCVColumnNamesForModelType(ValidationError):
|
|
573
|
+
"""Raised when CV columns are used for non-OBJECT_DETECTION model types."""
|
|
574
|
+
|
|
403
575
|
def __repr__(self) -> str:
|
|
576
|
+
"""Return a string representation for debugging and logging."""
|
|
404
577
|
return "Invalid_CV_Prediction_or_Actual_Column_Names_for_Model_Type"
|
|
405
578
|
|
|
406
579
|
def __init__(
|
|
407
580
|
self,
|
|
408
581
|
invalid_model_type: ModelTypes,
|
|
409
582
|
) -> None:
|
|
583
|
+
"""Initialize the exception with model type context.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
invalid_model_type: Model type that cannot use CV columns.
|
|
587
|
+
"""
|
|
410
588
|
self.invalid_model_type = invalid_model_type
|
|
411
589
|
|
|
412
590
|
def error_message(self) -> str:
|
|
591
|
+
"""Return the error message for this exception."""
|
|
413
592
|
return (
|
|
414
593
|
f"Cannot use 'object_detection_prediction_column_names' or "
|
|
415
594
|
f"'object_detection_actual_column_names' or "
|
|
@@ -422,51 +601,75 @@ class InvalidPredActCVColumnNamesForModelType(ValidationError):
|
|
|
422
601
|
|
|
423
602
|
|
|
424
603
|
class MissingReqPredActColumnNamesForMultiClass(ValidationError):
|
|
604
|
+
"""Raised when multi-class model is missing required score columns."""
|
|
605
|
+
|
|
425
606
|
def __repr__(self) -> str:
|
|
607
|
+
"""Return a string representation for debugging and logging."""
|
|
426
608
|
return "Missing_Required_Prediction_or_Actual_Column_Names_for_Multi_Class_Model_Type"
|
|
427
609
|
|
|
428
610
|
def error_message(self) -> str:
|
|
611
|
+
"""Return the error message for this exception."""
|
|
429
612
|
return (
|
|
430
613
|
"For logging data for a multi class model, schema must specify: "
|
|
431
614
|
"prediction_scores_column_name and/or actual_score_column_name. "
|
|
432
|
-
"Optionally, you may include multi_class_threshold_scores_column_name"
|
|
433
|
-
"
|
|
615
|
+
"Optionally, you may include multi_class_threshold_scores_column_name "
|
|
616
|
+
"(must include prediction_scores_column_name)"
|
|
434
617
|
)
|
|
435
618
|
|
|
436
619
|
|
|
437
620
|
class InvalidPredActColumnNamesForModelType(ValidationError):
|
|
621
|
+
"""Raised when prediction/actual columns are invalid for the model type."""
|
|
622
|
+
|
|
438
623
|
def __repr__(self) -> str:
|
|
624
|
+
"""Return a string representation for debugging and logging."""
|
|
439
625
|
return "Invalid_Prediction_or_Actual_Column_Names_for_Model_Type"
|
|
440
626
|
|
|
441
627
|
def __init__(
|
|
442
628
|
self,
|
|
443
629
|
invalid_model_type: ModelTypes,
|
|
444
|
-
allowed_fields:
|
|
445
|
-
wrong_columns:
|
|
630
|
+
allowed_fields: list[str],
|
|
631
|
+
wrong_columns: list[str],
|
|
446
632
|
) -> None:
|
|
633
|
+
"""Initialize the exception with model type and column validation context.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
invalid_model_type: Model type with invalid columns.
|
|
637
|
+
allowed_fields: List of allowed schema fields for the model type.
|
|
638
|
+
wrong_columns: Columns that are invalid for the model type.
|
|
639
|
+
"""
|
|
447
640
|
self.invalid_model_type = invalid_model_type
|
|
448
641
|
self.allowed_fields = allowed_fields
|
|
449
642
|
self.wrong_columns = wrong_columns
|
|
450
643
|
|
|
451
644
|
def error_message(self) -> str:
|
|
645
|
+
"""Return the error message for this exception."""
|
|
452
646
|
allowed_col_msg = ""
|
|
453
647
|
if self.allowed_fields is not None:
|
|
454
648
|
allowed_col_msg = f" Allowed Schema fields are {log_a_list(self.allowed_fields, 'and')}"
|
|
455
649
|
return (
|
|
456
|
-
f"Invalid Schema fields for {self.invalid_model_type} model type. {allowed_col_msg}"
|
|
650
|
+
f"Invalid Schema fields for {self.invalid_model_type} model type. {allowed_col_msg}. "
|
|
457
651
|
"The following columns of your dataframe are sent as an invalid schema field: "
|
|
458
652
|
f"{log_a_list(self.wrong_columns, 'and')}"
|
|
459
653
|
)
|
|
460
654
|
|
|
461
655
|
|
|
462
656
|
class DuplicateColumnsInDataframe(ValidationError):
|
|
657
|
+
"""Raised when dataframe contains duplicate column names used in schema."""
|
|
658
|
+
|
|
463
659
|
def __repr__(self) -> str:
|
|
660
|
+
"""Return a string representation for debugging and logging."""
|
|
464
661
|
return "Duplicate_Columns_In_Dataframe"
|
|
465
662
|
|
|
466
663
|
def __init__(self, cols: Iterable) -> None:
|
|
664
|
+
"""Initialize the exception with duplicate columns context.
|
|
665
|
+
|
|
666
|
+
Args:
|
|
667
|
+
cols: Columns that have duplicates in the dataframe.
|
|
668
|
+
"""
|
|
467
669
|
self.duplicate_cols = cols
|
|
468
670
|
|
|
469
671
|
def error_message(self) -> str:
|
|
672
|
+
"""Return the error message for this exception."""
|
|
470
673
|
return (
|
|
471
674
|
"The following columns are present in the schema and have duplicates in the dataframe: "
|
|
472
675
|
f"{self.duplicate_cols}. "
|
|
@@ -474,13 +677,22 @@ class DuplicateColumnsInDataframe(ValidationError):
|
|
|
474
677
|
|
|
475
678
|
|
|
476
679
|
class InvalidNumberOfEmbeddings(ValidationError):
|
|
680
|
+
"""Raised when the number of embeddings exceeds the maximum allowed."""
|
|
681
|
+
|
|
477
682
|
def __repr__(self) -> str:
|
|
683
|
+
"""Return a string representation for debugging and logging."""
|
|
478
684
|
return "Invalid_Number_Of_Embeddings"
|
|
479
685
|
|
|
480
686
|
def __init__(self, number_of_embeddings: int) -> None:
|
|
687
|
+
"""Initialize the exception with embedding count context.
|
|
688
|
+
|
|
689
|
+
Args:
|
|
690
|
+
number_of_embeddings: Number of embeddings found in the schema.
|
|
691
|
+
"""
|
|
481
692
|
self.number_of_embeddings = number_of_embeddings
|
|
482
693
|
|
|
483
694
|
def error_message(self) -> str:
|
|
695
|
+
"""Return the error message for this exception."""
|
|
484
696
|
return (
|
|
485
697
|
f"The schema contains {self.number_of_embeddings} different embeddings when a maximum of "
|
|
486
698
|
f"{MAX_NUMBER_OF_EMBEDDINGS} is allowed."
|
|
@@ -493,17 +705,28 @@ class InvalidNumberOfEmbeddings(ValidationError):
|
|
|
493
705
|
|
|
494
706
|
|
|
495
707
|
class InvalidType(ValidationError):
|
|
708
|
+
"""Raised when a field has an invalid data type."""
|
|
709
|
+
|
|
496
710
|
def __repr__(self) -> str:
|
|
711
|
+
"""Return a string representation for debugging and logging."""
|
|
497
712
|
return "Invalid_Type"
|
|
498
713
|
|
|
499
714
|
def __init__(
|
|
500
|
-
self, name: str, expected_types:
|
|
715
|
+
self, name: str, expected_types: list[str], found_data_type: str
|
|
501
716
|
) -> None:
|
|
717
|
+
"""Initialize the exception with type validation context.
|
|
718
|
+
|
|
719
|
+
Args:
|
|
720
|
+
name: Name of the field with invalid type.
|
|
721
|
+
expected_types: List of expected data types.
|
|
722
|
+
found_data_type: Actual data type found.
|
|
723
|
+
"""
|
|
502
724
|
self.name = name
|
|
503
725
|
self.expected_types = expected_types
|
|
504
726
|
self.found_data_type = found_data_type
|
|
505
727
|
|
|
506
728
|
def error_message(self) -> str:
|
|
729
|
+
"""Return the error message for this exception."""
|
|
507
730
|
type_list = (
|
|
508
731
|
self.expected_types[0]
|
|
509
732
|
if len(self.expected_types) == 1
|
|
@@ -517,16 +740,26 @@ class InvalidType(ValidationError):
|
|
|
517
740
|
|
|
518
741
|
|
|
519
742
|
class InvalidTypeColumns(ValidationError):
|
|
743
|
+
"""Raised when columns have invalid data types."""
|
|
744
|
+
|
|
520
745
|
def __repr__(self) -> str:
|
|
746
|
+
"""Return a string representation for debugging and logging."""
|
|
521
747
|
return "Invalid_Type_Columns"
|
|
522
748
|
|
|
523
749
|
def __init__(
|
|
524
|
-
self, wrong_type_columns:
|
|
750
|
+
self, wrong_type_columns: list[str], expected_types: list[str]
|
|
525
751
|
) -> None:
|
|
752
|
+
"""Initialize the exception with column type validation context.
|
|
753
|
+
|
|
754
|
+
Args:
|
|
755
|
+
wrong_type_columns: Columns with incorrect data types.
|
|
756
|
+
expected_types: List of expected data types for the columns.
|
|
757
|
+
"""
|
|
526
758
|
self.wrong_type_columns = wrong_type_columns
|
|
527
759
|
self.expected_types = expected_types
|
|
528
760
|
|
|
529
761
|
def error_message(self) -> str:
|
|
762
|
+
"""Return the error message for this exception."""
|
|
530
763
|
col_list = (
|
|
531
764
|
self.wrong_type_columns[0]
|
|
532
765
|
if len(self.wrong_type_columns) == 1
|
|
@@ -541,14 +774,24 @@ class InvalidTypeColumns(ValidationError):
|
|
|
541
774
|
|
|
542
775
|
|
|
543
776
|
class InvalidTypeFeatures(ValidationError):
|
|
777
|
+
"""Raised when feature columns have unrecognized data types."""
|
|
778
|
+
|
|
544
779
|
def __repr__(self) -> str:
|
|
780
|
+
"""Return a string representation for debugging and logging."""
|
|
545
781
|
return "Invalid_Type_Features"
|
|
546
782
|
|
|
547
|
-
def __init__(self, cols: Iterable, expected_types:
|
|
783
|
+
def __init__(self, cols: Iterable, expected_types: list[str]) -> None:
|
|
784
|
+
"""Initialize the exception with feature type validation context.
|
|
785
|
+
|
|
786
|
+
Args:
|
|
787
|
+
cols: Feature columns with unrecognized data types.
|
|
788
|
+
expected_types: List of expected data types for features.
|
|
789
|
+
"""
|
|
548
790
|
self.wrong_type_columns = cols
|
|
549
791
|
self.expected_types = expected_types
|
|
550
792
|
|
|
551
793
|
def error_message(self) -> str:
|
|
794
|
+
"""Return the error message for this exception."""
|
|
552
795
|
type_list = (
|
|
553
796
|
self.expected_types[0]
|
|
554
797
|
if len(self.expected_types) == 1
|
|
@@ -562,30 +805,48 @@ class InvalidTypeFeatures(ValidationError):
|
|
|
562
805
|
|
|
563
806
|
|
|
564
807
|
class InvalidFieldTypePromptTemplates(ValidationError):
|
|
808
|
+
"""Raised when prompt template column names are not of correct type."""
|
|
809
|
+
|
|
565
810
|
def __repr__(self) -> str:
|
|
811
|
+
"""Return a string representation for debugging and logging."""
|
|
566
812
|
return "Invalid_Input_Type_Prompt_Templates"
|
|
567
813
|
|
|
568
814
|
def error_message(self) -> str:
|
|
815
|
+
"""Return the error message for this exception."""
|
|
569
816
|
return "prompt_template_column_names must be of type PromptTemplateColumnNames"
|
|
570
817
|
|
|
571
818
|
|
|
572
819
|
class InvalidFieldTypeLlmConfig(ValidationError):
|
|
820
|
+
"""Raised when LLM config column names are not of correct type."""
|
|
821
|
+
|
|
573
822
|
def __repr__(self) -> str:
|
|
823
|
+
"""Return a string representation for debugging and logging."""
|
|
574
824
|
return "Invalid_Input_Type_LLM_Config"
|
|
575
825
|
|
|
576
826
|
def error_message(self) -> str:
|
|
827
|
+
"""Return the error message for this exception."""
|
|
577
828
|
return "llm_config_column_names must be of type LLMConfigColumnNames"
|
|
578
829
|
|
|
579
830
|
|
|
580
831
|
class InvalidTypeTags(ValidationError):
|
|
832
|
+
"""Raised when tag columns have unrecognized data types."""
|
|
833
|
+
|
|
581
834
|
def __repr__(self) -> str:
|
|
835
|
+
"""Return a string representation for debugging and logging."""
|
|
582
836
|
return "Invalid_Type_Tags"
|
|
583
837
|
|
|
584
|
-
def __init__(self, cols: Iterable, expected_types:
|
|
838
|
+
def __init__(self, cols: Iterable, expected_types: list[str]) -> None:
|
|
839
|
+
"""Initialize the exception with tag type validation context.
|
|
840
|
+
|
|
841
|
+
Args:
|
|
842
|
+
cols: Tag columns with unrecognized data types.
|
|
843
|
+
expected_types: List of expected data types for tags.
|
|
844
|
+
"""
|
|
585
845
|
self.wrong_type_columns = cols
|
|
586
846
|
self.expected_types = expected_types
|
|
587
847
|
|
|
588
848
|
def error_message(self) -> str:
|
|
849
|
+
"""Return the error message for this exception."""
|
|
589
850
|
type_list = (
|
|
590
851
|
self.expected_types[0]
|
|
591
852
|
if len(self.expected_types) == 1
|
|
@@ -599,14 +860,24 @@ class InvalidTypeTags(ValidationError):
|
|
|
599
860
|
|
|
600
861
|
|
|
601
862
|
class InvalidValueEmbeddingVectorDimensionality(ValidationError):
|
|
863
|
+
"""Raised when embedding vector dimensionality is out of valid range."""
|
|
864
|
+
|
|
602
865
|
def __repr__(self) -> str:
|
|
866
|
+
"""Return a string representation for debugging and logging."""
|
|
603
867
|
return "Invalid_Value_Embedding_Vector_Dimensionality"
|
|
604
868
|
|
|
605
|
-
def __init__(self, dim_1_cols:
|
|
869
|
+
def __init__(self, dim_1_cols: list[str], high_dim_cols: list[str]) -> None:
|
|
870
|
+
"""Initialize the exception with embedding dimensionality context.
|
|
871
|
+
|
|
872
|
+
Args:
|
|
873
|
+
dim_1_cols: Columns with dimensionality of 1.
|
|
874
|
+
high_dim_cols: Columns with dimensionality exceeding the maximum.
|
|
875
|
+
"""
|
|
606
876
|
self.dim_1_cols = dim_1_cols
|
|
607
877
|
self.high_dim_cols = high_dim_cols
|
|
608
878
|
|
|
609
879
|
def error_message(self) -> str:
|
|
880
|
+
"""Return the error message for this exception."""
|
|
610
881
|
msg = (
|
|
611
882
|
"Embedding vectors cannot have length (dimensionality) of 1 or higher "
|
|
612
883
|
f"than {MAX_EMBEDDING_DIMENSIONALITY}. "
|
|
@@ -623,13 +894,22 @@ class InvalidValueEmbeddingVectorDimensionality(ValidationError):
|
|
|
623
894
|
|
|
624
895
|
|
|
625
896
|
class InvalidValueEmbeddingRawDataTooLong(ValidationError):
|
|
897
|
+
"""Raised when embedding raw data exceeds maximum character limit."""
|
|
898
|
+
|
|
626
899
|
def __repr__(self) -> str:
|
|
900
|
+
"""Return a string representation for debugging and logging."""
|
|
627
901
|
return "Invalid_Value_Embedding_Raw_Data_Too_Long"
|
|
628
902
|
|
|
629
903
|
def __init__(self, cols: Iterable) -> None:
|
|
904
|
+
"""Initialize the exception with raw data length validation context.
|
|
905
|
+
|
|
906
|
+
Args:
|
|
907
|
+
cols: Columns with embedding raw data exceeding maximum characters.
|
|
908
|
+
"""
|
|
630
909
|
self.invalid_cols = cols
|
|
631
910
|
|
|
632
911
|
def error_message(self) -> str:
|
|
912
|
+
"""Return the error message for this exception."""
|
|
633
913
|
return (
|
|
634
914
|
f"Embedding raw data cannot have more than {MAX_RAW_DATA_CHARACTERS} characters. "
|
|
635
915
|
"The following columns do not satisfy this condition: "
|
|
@@ -638,14 +918,24 @@ class InvalidValueEmbeddingRawDataTooLong(ValidationError):
|
|
|
638
918
|
|
|
639
919
|
|
|
640
920
|
class InvalidTypeShapValues(ValidationError):
|
|
921
|
+
"""Raised when SHAP value columns have unrecognized data types."""
|
|
922
|
+
|
|
641
923
|
def __repr__(self) -> str:
|
|
924
|
+
"""Return a string representation for debugging and logging."""
|
|
642
925
|
return "Invalid_Type_SHAP_Values"
|
|
643
926
|
|
|
644
|
-
def __init__(self, cols: Iterable, expected_types:
|
|
927
|
+
def __init__(self, cols: Iterable, expected_types: list[str]) -> None:
|
|
928
|
+
"""Initialize the exception with SHAP value type validation context.
|
|
929
|
+
|
|
930
|
+
Args:
|
|
931
|
+
cols: SHAP value columns with unrecognized data types.
|
|
932
|
+
expected_types: List of expected data types for SHAP values.
|
|
933
|
+
"""
|
|
645
934
|
self.wrong_type_columns = cols
|
|
646
935
|
self.expected_types = expected_types
|
|
647
936
|
|
|
648
937
|
def error_message(self) -> str:
|
|
938
|
+
"""Return the error message for this exception."""
|
|
649
939
|
type_list = (
|
|
650
940
|
self.expected_types[0]
|
|
651
941
|
if len(self.expected_types) == 1
|
|
@@ -664,13 +954,22 @@ class InvalidTypeShapValues(ValidationError):
|
|
|
664
954
|
|
|
665
955
|
|
|
666
956
|
class InvalidValueTimestamp(ValidationError):
|
|
957
|
+
"""Raised when timestamp values are outside acceptable time range."""
|
|
958
|
+
|
|
667
959
|
def __repr__(self) -> str:
|
|
960
|
+
"""Return a string representation for debugging and logging."""
|
|
668
961
|
return "Invalid_Timestamp_Value"
|
|
669
962
|
|
|
670
963
|
def __init__(self, timestamp_col_name: str) -> None:
|
|
964
|
+
"""Initialize the exception with timestamp validation context.
|
|
965
|
+
|
|
966
|
+
Args:
|
|
967
|
+
timestamp_col_name: Name of the column containing invalid timestamp values.
|
|
968
|
+
"""
|
|
671
969
|
self.timestamp_col_name = timestamp_col_name
|
|
672
970
|
|
|
673
971
|
def error_message(self) -> str:
|
|
972
|
+
"""Return the error message for this exception."""
|
|
674
973
|
return (
|
|
675
974
|
f"Prediction timestamp in {self.timestamp_col_name} is out of range. "
|
|
676
975
|
f"Prediction timestamps must be within {MAX_FUTURE_YEARS_FROM_CURRENT_TIME} year "
|
|
@@ -681,51 +980,83 @@ class InvalidValueTimestamp(ValidationError):
|
|
|
681
980
|
|
|
682
981
|
|
|
683
982
|
class InvalidValueMissingValue(ValidationError):
|
|
983
|
+
"""Raised when required fields contain null or missing values."""
|
|
984
|
+
|
|
684
985
|
def __repr__(self) -> str:
|
|
986
|
+
"""Return a string representation for debugging and logging."""
|
|
685
987
|
return "Invalid_Missing_Value"
|
|
686
988
|
|
|
687
989
|
def __init__(
|
|
688
|
-
self, name: str, wrong_values: str, column:
|
|
990
|
+
self, name: str, wrong_values: str, column: str | None = None
|
|
689
991
|
) -> None:
|
|
992
|
+
"""Initialize the exception with missing value validation context.
|
|
993
|
+
|
|
994
|
+
Args:
|
|
995
|
+
name: Name of the field with missing values.
|
|
996
|
+
wrong_values: Description of the wrong values found (e.g., "null", "NaN").
|
|
997
|
+
column: Optional column name where missing values were found.
|
|
998
|
+
"""
|
|
690
999
|
self.name = name
|
|
691
1000
|
self.wrong_values = wrong_values
|
|
692
1001
|
self.column = column
|
|
693
1002
|
|
|
694
1003
|
def error_message(self) -> str:
|
|
1004
|
+
"""Return the error message for this exception."""
|
|
695
1005
|
if self.name in ["Prediction ID", "Prediction Group ID", "Rank"]:
|
|
696
1006
|
return f"{self.name} column '{self.column}' must not contain {self.wrong_values} values."
|
|
697
|
-
|
|
698
|
-
return f"{self.name} must not contain {self.wrong_values} values."
|
|
1007
|
+
return f"{self.name} must not contain {self.wrong_values} values."
|
|
699
1008
|
|
|
700
1009
|
|
|
701
1010
|
class InvalidRankValue(ValidationError):
|
|
1011
|
+
"""Raised when ranking column values are outside acceptable range."""
|
|
1012
|
+
|
|
702
1013
|
def __repr__(self) -> str:
|
|
1014
|
+
"""Return a string representation for debugging and logging."""
|
|
703
1015
|
return "Invalid_Rank_Value"
|
|
704
1016
|
|
|
705
1017
|
def __init__(self, name: str, acceptable_range: str) -> None:
|
|
1018
|
+
"""Initialize the exception with rank validation context.
|
|
1019
|
+
|
|
1020
|
+
Args:
|
|
1021
|
+
name: Name of the ranking column.
|
|
1022
|
+
acceptable_range: Description of the acceptable value range.
|
|
1023
|
+
"""
|
|
706
1024
|
self.name = name
|
|
707
1025
|
self.acceptable_range = acceptable_range
|
|
708
1026
|
|
|
709
1027
|
def error_message(self) -> str:
|
|
1028
|
+
"""Return the error message for this exception."""
|
|
710
1029
|
return (
|
|
711
1030
|
f"ranking column {self.name} is out of range. "
|
|
712
|
-
f"Only values within {self.acceptable_range}
|
|
1031
|
+
f"Only values within {self.acceptable_range} are accepted."
|
|
713
1032
|
)
|
|
714
1033
|
|
|
715
1034
|
|
|
716
1035
|
class InvalidStringLengthInColumn(ValidationError):
|
|
1036
|
+
"""Raised when string values in a column exceed length limits."""
|
|
1037
|
+
|
|
717
1038
|
def __repr__(self) -> str:
|
|
1039
|
+
"""Return a string representation for debugging and logging."""
|
|
718
1040
|
return "Invalid_String_Length_In_Column"
|
|
719
1041
|
|
|
720
1042
|
def __init__(
|
|
721
1043
|
self, schema_name: str, col_name: str, min_length: int, max_length: int
|
|
722
1044
|
) -> None:
|
|
1045
|
+
"""Initialize the exception with string length validation context.
|
|
1046
|
+
|
|
1047
|
+
Args:
|
|
1048
|
+
schema_name: Name of the schema field.
|
|
1049
|
+
col_name: Name of the column with invalid string lengths.
|
|
1050
|
+
min_length: Minimum acceptable string length.
|
|
1051
|
+
max_length: Maximum acceptable string length.
|
|
1052
|
+
"""
|
|
723
1053
|
self.schema_name = schema_name
|
|
724
1054
|
self.col_name = col_name
|
|
725
1055
|
self.min_length = min_length
|
|
726
1056
|
self.max_length = max_length
|
|
727
1057
|
|
|
728
1058
|
def error_message(self) -> str:
|
|
1059
|
+
"""Return the error message for this exception."""
|
|
729
1060
|
return (
|
|
730
1061
|
f"{self.schema_name} column '{self.col_name}' contains invalid values. "
|
|
731
1062
|
f"Only string values of length between {self.min_length} and {self.max_length} are accepted."
|
|
@@ -733,13 +1064,22 @@ class InvalidStringLengthInColumn(ValidationError):
|
|
|
733
1064
|
|
|
734
1065
|
|
|
735
1066
|
class InvalidTagLength(ValidationError):
|
|
1067
|
+
"""Raised when tag values exceed maximum character length."""
|
|
1068
|
+
|
|
736
1069
|
def __repr__(self) -> str:
|
|
1070
|
+
"""Return a string representation for debugging and logging."""
|
|
737
1071
|
return "Invalid_Tag_Length"
|
|
738
1072
|
|
|
739
1073
|
def __init__(self, cols: Iterable) -> None:
|
|
1074
|
+
"""Initialize the exception with tag length validation context.
|
|
1075
|
+
|
|
1076
|
+
Args:
|
|
1077
|
+
cols: Tag columns with values exceeding maximum character length.
|
|
1078
|
+
"""
|
|
740
1079
|
self.wrong_value_columns = cols
|
|
741
1080
|
|
|
742
1081
|
def error_message(self) -> str:
|
|
1082
|
+
"""Return the error message for this exception."""
|
|
743
1083
|
return (
|
|
744
1084
|
f"Only tag values with less than or equal to {MAX_TAG_LENGTH} characters are supported. "
|
|
745
1085
|
f"The following tag columns have more than {MAX_TAG_LENGTH} characters: "
|
|
@@ -748,29 +1088,47 @@ class InvalidTagLength(ValidationError):
|
|
|
748
1088
|
|
|
749
1089
|
|
|
750
1090
|
class InvalidRankingCategoryValue(ValidationError):
|
|
1091
|
+
"""Raised when ranking relevance labels contain invalid values."""
|
|
1092
|
+
|
|
751
1093
|
def __repr__(self) -> str:
|
|
1094
|
+
"""Return a string representation for debugging and logging."""
|
|
752
1095
|
return "Invalid_Ranking_Relevance_Labels_Value"
|
|
753
1096
|
|
|
754
1097
|
def __init__(self, name: str) -> None:
|
|
1098
|
+
"""Initialize the exception with ranking category validation context.
|
|
1099
|
+
|
|
1100
|
+
Args:
|
|
1101
|
+
name: Name of the ranking relevance labels column.
|
|
1102
|
+
"""
|
|
755
1103
|
self.name = name
|
|
756
1104
|
|
|
757
1105
|
def error_message(self) -> str:
|
|
1106
|
+
"""Return the error message for this exception."""
|
|
758
1107
|
return (
|
|
759
|
-
f"
|
|
760
|
-
f"
|
|
1108
|
+
f"Ranking relevance labels '{self.name}' column contains invalid value. "
|
|
1109
|
+
f"Make sure empty string is not present"
|
|
761
1110
|
)
|
|
762
1111
|
|
|
763
1112
|
|
|
764
1113
|
class InvalidBoundingBoxesCoordinates(ValidationError, Exception):
|
|
1114
|
+
"""Raised when bounding box coordinates are invalid or incorrectly formatted."""
|
|
1115
|
+
|
|
765
1116
|
def __repr__(self) -> str:
|
|
1117
|
+
"""Return a string representation for debugging and logging."""
|
|
766
1118
|
return "Invalid_Bounding_Boxes_Coordinates"
|
|
767
1119
|
|
|
768
|
-
def __init__(self, reason) -> None:
|
|
1120
|
+
def __init__(self, reason: str) -> None:
|
|
1121
|
+
"""Initialize the exception with bounding box coordinate validation context.
|
|
1122
|
+
|
|
1123
|
+
Args:
|
|
1124
|
+
reason: Specific reason for invalid coordinates (e.g., "none_boxes",
|
|
1125
|
+
"none_or_empty_box", "boxes_coordinates_wrong_format").
|
|
1126
|
+
"""
|
|
769
1127
|
self._check_valid_reason(reason)
|
|
770
1128
|
self.reason = reason
|
|
771
1129
|
|
|
772
1130
|
@staticmethod
|
|
773
|
-
def _check_valid_reason(reason):
|
|
1131
|
+
def _check_valid_reason(reason: str) -> None:
|
|
774
1132
|
possible_reasons = (
|
|
775
1133
|
"none_boxes",
|
|
776
1134
|
"none_or_empty_box",
|
|
@@ -783,6 +1141,7 @@ class InvalidBoundingBoxesCoordinates(ValidationError, Exception):
|
|
|
783
1141
|
)
|
|
784
1142
|
|
|
785
1143
|
def error_message(self) -> str:
|
|
1144
|
+
"""Return the error message for this exception."""
|
|
786
1145
|
msg = "Invalid bounding boxes coordinates found. "
|
|
787
1146
|
if self.reason == "none_boxes":
|
|
788
1147
|
msg += (
|
|
@@ -805,15 +1164,24 @@ class InvalidBoundingBoxesCoordinates(ValidationError, Exception):
|
|
|
805
1164
|
|
|
806
1165
|
|
|
807
1166
|
class InvalidBoundingBoxesCategories(ValidationError, Exception):
|
|
1167
|
+
"""Raised when bounding box categories are invalid or missing."""
|
|
1168
|
+
|
|
808
1169
|
def __repr__(self) -> str:
|
|
1170
|
+
"""Return a string representation for debugging and logging."""
|
|
809
1171
|
return "Invalid_Bounding_Boxes_Categories"
|
|
810
1172
|
|
|
811
|
-
def __init__(self, reason) -> None:
|
|
1173
|
+
def __init__(self, reason: str) -> None:
|
|
1174
|
+
"""Initialize the exception with bounding box category validation context.
|
|
1175
|
+
|
|
1176
|
+
Args:
|
|
1177
|
+
reason: Specific reason for invalid categories (e.g., "none_category_list",
|
|
1178
|
+
"none_category").
|
|
1179
|
+
"""
|
|
812
1180
|
self._check_valid_reason(reason)
|
|
813
1181
|
self.reason = reason
|
|
814
1182
|
|
|
815
1183
|
@staticmethod
|
|
816
|
-
def _check_valid_reason(reason):
|
|
1184
|
+
def _check_valid_reason(reason: str) -> None:
|
|
817
1185
|
possible_reasons = (
|
|
818
1186
|
"none_category_list",
|
|
819
1187
|
"none_category",
|
|
@@ -825,6 +1193,7 @@ class InvalidBoundingBoxesCategories(ValidationError, Exception):
|
|
|
825
1193
|
)
|
|
826
1194
|
|
|
827
1195
|
def error_message(self) -> str:
|
|
1196
|
+
"""Return the error message for this exception."""
|
|
828
1197
|
msg = "Invalid bounding boxes categories found. "
|
|
829
1198
|
if self.reason == "none_category_list":
|
|
830
1199
|
msg += (
|
|
@@ -840,15 +1209,24 @@ class InvalidBoundingBoxesCategories(ValidationError, Exception):
|
|
|
840
1209
|
|
|
841
1210
|
|
|
842
1211
|
class InvalidBoundingBoxesScores(ValidationError, Exception):
|
|
1212
|
+
"""Raised when bounding box confidence scores are invalid or out of bounds."""
|
|
1213
|
+
|
|
843
1214
|
def __repr__(self) -> str:
|
|
1215
|
+
"""Return a string representation for debugging and logging."""
|
|
844
1216
|
return "Invalid_Bounding_Boxes_Scores"
|
|
845
1217
|
|
|
846
|
-
def __init__(self, reason) -> None:
|
|
1218
|
+
def __init__(self, reason: str) -> None:
|
|
1219
|
+
"""Initialize the exception with bounding box score validation context.
|
|
1220
|
+
|
|
1221
|
+
Args:
|
|
1222
|
+
reason: Specific reason for invalid scores (e.g., "none_score_list",
|
|
1223
|
+
"scores_out_of_bounds").
|
|
1224
|
+
"""
|
|
847
1225
|
self._check_valid_reason(reason)
|
|
848
1226
|
self.reason = reason
|
|
849
1227
|
|
|
850
1228
|
@staticmethod
|
|
851
|
-
def _check_valid_reason(reason):
|
|
1229
|
+
def _check_valid_reason(reason: str) -> None:
|
|
852
1230
|
possible_reasons = (
|
|
853
1231
|
"none_score_list",
|
|
854
1232
|
"scores_out_of_bounds",
|
|
@@ -860,6 +1238,7 @@ class InvalidBoundingBoxesScores(ValidationError, Exception):
|
|
|
860
1238
|
)
|
|
861
1239
|
|
|
862
1240
|
def error_message(self) -> str:
|
|
1241
|
+
"""Return the error message for this exception."""
|
|
863
1242
|
msg = "Invalid bounding boxes scores found. "
|
|
864
1243
|
if self.reason == "none_score_list":
|
|
865
1244
|
msg += (
|
|
@@ -875,18 +1254,28 @@ class InvalidBoundingBoxesScores(ValidationError, Exception):
|
|
|
875
1254
|
|
|
876
1255
|
|
|
877
1256
|
class InvalidPolygonCoordinates(ValidationError, Exception):
|
|
1257
|
+
"""Raised when polygon coordinates are invalid or incorrectly formatted."""
|
|
1258
|
+
|
|
878
1259
|
def __repr__(self) -> str:
|
|
1260
|
+
"""Return a string representation for debugging and logging."""
|
|
879
1261
|
return "Invalid_Polygon_Coordinates"
|
|
880
1262
|
|
|
881
1263
|
def __init__(
|
|
882
|
-
self, reason: str, coordinates:
|
|
1264
|
+
self, reason: str, coordinates: list[float] | None = None
|
|
883
1265
|
) -> None:
|
|
1266
|
+
"""Initialize the exception with polygon coordinate validation context.
|
|
1267
|
+
|
|
1268
|
+
Args:
|
|
1269
|
+
reason: Specific reason for invalid coordinates (e.g., "none_polygons",
|
|
1270
|
+
"none_or_empty_polygon", "polygon_coordinates_wrong_format").
|
|
1271
|
+
coordinates: Optional list of invalid coordinates for error reporting.
|
|
1272
|
+
"""
|
|
884
1273
|
self._check_valid_reason(reason)
|
|
885
1274
|
self.reason = reason
|
|
886
1275
|
self.coordinates = coordinates
|
|
887
1276
|
|
|
888
1277
|
@staticmethod
|
|
889
|
-
def _check_valid_reason(reason):
|
|
1278
|
+
def _check_valid_reason(reason: str) -> None:
|
|
890
1279
|
possible_reasons = (
|
|
891
1280
|
"none_polygons",
|
|
892
1281
|
"none_or_empty_polygon",
|
|
@@ -901,6 +1290,7 @@ class InvalidPolygonCoordinates(ValidationError, Exception):
|
|
|
901
1290
|
)
|
|
902
1291
|
|
|
903
1292
|
def error_message(self) -> str:
|
|
1293
|
+
"""Return the error message for this exception."""
|
|
904
1294
|
msg = "Invalid polygon coordinates found. "
|
|
905
1295
|
if self.reason == "none_polygons":
|
|
906
1296
|
msg += (
|
|
@@ -923,28 +1313,37 @@ class InvalidPolygonCoordinates(ValidationError, Exception):
|
|
|
923
1313
|
elif self.reason == "polygon_coordinates_repeated_vertices":
|
|
924
1314
|
msg += (
|
|
925
1315
|
"Found at least one polygon with repeated vertices. "
|
|
926
|
-
"No polygon can have repeated vertices."
|
|
1316
|
+
"No polygon can have repeated vertices. "
|
|
927
1317
|
f"The following coordinates are invalid: {self.coordinates}"
|
|
928
1318
|
)
|
|
929
1319
|
elif self.reason == "polygon_coordinates_self_intersecting_vertices":
|
|
930
1320
|
msg += (
|
|
931
1321
|
"Found at least one polygon with self-intersecting vertices. "
|
|
932
|
-
"Each polygon must not have self-intersecting vertices."
|
|
1322
|
+
"Each polygon must not have self-intersecting vertices. "
|
|
933
1323
|
f"The following coordinates are invalid: {self.coordinates}"
|
|
934
1324
|
)
|
|
935
1325
|
return msg
|
|
936
1326
|
|
|
937
1327
|
|
|
938
1328
|
class InvalidPolygonCategories(ValidationError, Exception):
|
|
1329
|
+
"""Raised when polygon categories are invalid or missing."""
|
|
1330
|
+
|
|
939
1331
|
def __repr__(self) -> str:
|
|
1332
|
+
"""Return a string representation for debugging and logging."""
|
|
940
1333
|
return "Invalid_Polygon_Categories"
|
|
941
1334
|
|
|
942
|
-
def __init__(self, reason) -> None:
|
|
1335
|
+
def __init__(self, reason: str) -> None:
|
|
1336
|
+
"""Initialize the exception with polygon category validation context.
|
|
1337
|
+
|
|
1338
|
+
Args:
|
|
1339
|
+
reason: Specific reason for invalid categories (e.g., "none_category_list",
|
|
1340
|
+
"none_category").
|
|
1341
|
+
"""
|
|
943
1342
|
self._check_valid_reason(reason)
|
|
944
1343
|
self.reason = reason
|
|
945
1344
|
|
|
946
1345
|
@staticmethod
|
|
947
|
-
def _check_valid_reason(reason):
|
|
1346
|
+
def _check_valid_reason(reason: str) -> None:
|
|
948
1347
|
possible_reasons = (
|
|
949
1348
|
"none_category_list",
|
|
950
1349
|
"none_category",
|
|
@@ -956,6 +1355,7 @@ class InvalidPolygonCategories(ValidationError, Exception):
|
|
|
956
1355
|
)
|
|
957
1356
|
|
|
958
1357
|
def error_message(self) -> str:
|
|
1358
|
+
"""Return the error message for this exception."""
|
|
959
1359
|
msg = "Invalid polygon categories found. "
|
|
960
1360
|
if self.reason == "none_category_list":
|
|
961
1361
|
msg += (
|
|
@@ -971,15 +1371,24 @@ class InvalidPolygonCategories(ValidationError, Exception):
|
|
|
971
1371
|
|
|
972
1372
|
|
|
973
1373
|
class InvalidPolygonScores(ValidationError, Exception):
|
|
1374
|
+
"""Raised when polygon confidence scores are invalid or out of bounds."""
|
|
1375
|
+
|
|
974
1376
|
def __repr__(self) -> str:
|
|
1377
|
+
"""Return a string representation for debugging and logging."""
|
|
975
1378
|
return "Invalid_Polygon_Scores"
|
|
976
1379
|
|
|
977
|
-
def __init__(self, reason) -> None:
|
|
1380
|
+
def __init__(self, reason: str) -> None:
|
|
1381
|
+
"""Initialize the exception with polygon score validation context.
|
|
1382
|
+
|
|
1383
|
+
Args:
|
|
1384
|
+
reason: Specific reason for invalid scores (e.g., "none_score_list",
|
|
1385
|
+
"scores_out_of_bounds").
|
|
1386
|
+
"""
|
|
978
1387
|
self._check_valid_reason(reason)
|
|
979
1388
|
self.reason = reason
|
|
980
1389
|
|
|
981
1390
|
@staticmethod
|
|
982
|
-
def _check_valid_reason(reason):
|
|
1391
|
+
def _check_valid_reason(reason: str) -> None:
|
|
983
1392
|
possible_reasons = (
|
|
984
1393
|
"none_score_list",
|
|
985
1394
|
"scores_out_of_bounds",
|
|
@@ -991,6 +1400,7 @@ class InvalidPolygonScores(ValidationError, Exception):
|
|
|
991
1400
|
)
|
|
992
1401
|
|
|
993
1402
|
def error_message(self) -> str:
|
|
1403
|
+
"""Return the error message for this exception."""
|
|
994
1404
|
msg = "Invalid polygon scores found. "
|
|
995
1405
|
if self.reason == "none_score_list":
|
|
996
1406
|
msg += (
|
|
@@ -1006,15 +1416,25 @@ class InvalidPolygonScores(ValidationError, Exception):
|
|
|
1006
1416
|
|
|
1007
1417
|
|
|
1008
1418
|
class InvalidNumClassesMultiClassMap(ValidationError):
|
|
1419
|
+
"""Raised when multi-class dictionary contains invalid number of classes."""
|
|
1420
|
+
|
|
1009
1421
|
def __repr__(self) -> str:
|
|
1422
|
+
"""Return a string representation for debugging and logging."""
|
|
1010
1423
|
return "Invalid_Num_classes_Multi_Class_Map"
|
|
1011
1424
|
|
|
1012
1425
|
def __init__(
|
|
1013
|
-
self, dict_col_to_list_of_invalid_num_classes:
|
|
1426
|
+
self, dict_col_to_list_of_invalid_num_classes: dict[str, list[str]]
|
|
1014
1427
|
) -> None:
|
|
1428
|
+
"""Initialize the exception with multi-class number validation context.
|
|
1429
|
+
|
|
1430
|
+
Args:
|
|
1431
|
+
dict_col_to_list_of_invalid_num_classes: Mapping of columns to lists of
|
|
1432
|
+
invalid number of classes found.
|
|
1433
|
+
"""
|
|
1015
1434
|
self.invalid_col_num_classes = dict_col_to_list_of_invalid_num_classes
|
|
1016
1435
|
|
|
1017
1436
|
def error_message(self) -> str:
|
|
1437
|
+
"""Return the error message for this exception."""
|
|
1018
1438
|
err_msg = ""
|
|
1019
1439
|
for (
|
|
1020
1440
|
col,
|
|
@@ -1025,8 +1445,8 @@ class InvalidNumClassesMultiClassMap(ValidationError):
|
|
|
1025
1445
|
list_invalid_num_classes
|
|
1026
1446
|
) # to de-duplicate
|
|
1027
1447
|
err_msg += (
|
|
1028
|
-
f"Multi-Class dictionary for the following column: {col} had {num_invalid_num_classes} rows"
|
|
1029
|
-
f"containing an invalid number of classes. The dictionary must contain at least 1 class"
|
|
1448
|
+
f"Multi-Class dictionary for the following column: {col} had {num_invalid_num_classes} rows "
|
|
1449
|
+
f"containing an invalid number of classes. The dictionary must contain at least 1 class "
|
|
1030
1450
|
f"and at most {MAX_NUMBER_OF_MULTI_CLASS_CLASSES} classes. Found rows with the following "
|
|
1031
1451
|
f"invalid number of classes: {log_a_list(list(set_invalid_num_classes), 'and')}\n"
|
|
1032
1452
|
)
|
|
@@ -1034,13 +1454,22 @@ class InvalidNumClassesMultiClassMap(ValidationError):
|
|
|
1034
1454
|
|
|
1035
1455
|
|
|
1036
1456
|
class InvalidMultiClassClassNameLength(ValidationError):
|
|
1457
|
+
"""Raised when multi-class class names exceed maximum length."""
|
|
1458
|
+
|
|
1037
1459
|
def __repr__(self) -> str:
|
|
1460
|
+
"""Return a string representation for debugging and logging."""
|
|
1038
1461
|
return "Invalid_Multi_Class_Class_Name_Length"
|
|
1039
1462
|
|
|
1040
|
-
def __init__(self, invalid_col_class_name:
|
|
1463
|
+
def __init__(self, invalid_col_class_name: dict[str, set]) -> None:
|
|
1464
|
+
"""Initialize the exception with multi-class name length validation context.
|
|
1465
|
+
|
|
1466
|
+
Args:
|
|
1467
|
+
invalid_col_class_name: Mapping of columns to sets of invalid class names.
|
|
1468
|
+
"""
|
|
1041
1469
|
self.invalid_col_class_name = invalid_col_class_name
|
|
1042
1470
|
|
|
1043
1471
|
def error_message(self) -> str:
|
|
1472
|
+
"""Return the error message for this exception."""
|
|
1044
1473
|
err_msg = ""
|
|
1045
1474
|
for col, class_names in self.invalid_col_class_name.items():
|
|
1046
1475
|
# limit to 10
|
|
@@ -1050,20 +1479,30 @@ class InvalidMultiClassClassNameLength(ValidationError):
|
|
|
1050
1479
|
else list(class_names)
|
|
1051
1480
|
)
|
|
1052
1481
|
err_msg += (
|
|
1053
|
-
f"Found some invalid class names: {log_a_list(class_names, 'and')}
|
|
1054
|
-
f" names must have at least one character
|
|
1482
|
+
f"Found some invalid class names: {log_a_list(class_names, 'and')} "
|
|
1483
|
+
f"in the {col} column. Class names must have at least one character "
|
|
1484
|
+
f"and less than {MAX_MULTI_CLASS_NAME_LENGTH}.\n"
|
|
1055
1485
|
)
|
|
1056
1486
|
return err_msg
|
|
1057
1487
|
|
|
1058
1488
|
|
|
1059
1489
|
class InvalidMultiClassPredScoreValue(ValidationError):
|
|
1490
|
+
"""Raised when multi-class prediction scores are outside valid range."""
|
|
1491
|
+
|
|
1060
1492
|
def __repr__(self) -> str:
|
|
1493
|
+
"""Return a string representation for debugging and logging."""
|
|
1061
1494
|
return "Invalid_Multi_Class_Pred_Score_Value"
|
|
1062
1495
|
|
|
1063
|
-
def __init__(self, invalid_col_class_scores:
|
|
1496
|
+
def __init__(self, invalid_col_class_scores: dict[str, set]) -> None:
|
|
1497
|
+
"""Initialize the exception with multi-class prediction score validation context.
|
|
1498
|
+
|
|
1499
|
+
Args:
|
|
1500
|
+
invalid_col_class_scores: Mapping of columns to sets of invalid scores.
|
|
1501
|
+
"""
|
|
1064
1502
|
self.invalid_col_class_scores = invalid_col_class_scores
|
|
1065
1503
|
|
|
1066
1504
|
def error_message(self) -> str:
|
|
1505
|
+
"""Return the error message for this exception."""
|
|
1067
1506
|
err_msg = ""
|
|
1068
1507
|
for col, scores in self.invalid_col_class_scores.items():
|
|
1069
1508
|
# limit to 10
|
|
@@ -1076,13 +1515,22 @@ class InvalidMultiClassPredScoreValue(ValidationError):
|
|
|
1076
1515
|
|
|
1077
1516
|
|
|
1078
1517
|
class InvalidMultiClassActScoreValue(ValidationError):
|
|
1518
|
+
"""Raised when multi-class actual scores are not 0 or 1."""
|
|
1519
|
+
|
|
1079
1520
|
def __repr__(self) -> str:
|
|
1521
|
+
"""Return a string representation for debugging and logging."""
|
|
1080
1522
|
return "Invalid_Multi_Class_Act_Score_Value"
|
|
1081
1523
|
|
|
1082
1524
|
def __init__(self, name: str) -> None:
|
|
1525
|
+
"""Initialize the exception with multi-class actual score validation context.
|
|
1526
|
+
|
|
1527
|
+
Args:
|
|
1528
|
+
name: Name of the column with invalid actual scores.
|
|
1529
|
+
"""
|
|
1083
1530
|
self.name = name
|
|
1084
1531
|
|
|
1085
1532
|
def error_message(self) -> str:
|
|
1533
|
+
"""Return the error message for this exception."""
|
|
1086
1534
|
return (
|
|
1087
1535
|
f"Found at least one score in the '{self.name}' column that was invalid. "
|
|
1088
1536
|
f"All scores (values) must be either 0 or 1."
|
|
@@ -1090,17 +1538,28 @@ class InvalidMultiClassActScoreValue(ValidationError):
|
|
|
1090
1538
|
|
|
1091
1539
|
|
|
1092
1540
|
class InvalidMultiClassThresholdClasses(ValidationError):
|
|
1541
|
+
"""Raised when prediction and threshold score dictionaries have mismatched classes."""
|
|
1542
|
+
|
|
1093
1543
|
def __repr__(self) -> str:
|
|
1544
|
+
"""Return a string representation for debugging and logging."""
|
|
1094
1545
|
return "Invalid_Multi_Class_Threshold_Classes"
|
|
1095
1546
|
|
|
1096
1547
|
def __init__(
|
|
1097
1548
|
self, name: str, prediction_class_set: set, threshold_class_set: set
|
|
1098
1549
|
) -> None:
|
|
1550
|
+
"""Initialize the exception with multi-class threshold validation context.
|
|
1551
|
+
|
|
1552
|
+
Args:
|
|
1553
|
+
name: Name of the field being validated.
|
|
1554
|
+
prediction_class_set: Set of classes in prediction scores dictionary.
|
|
1555
|
+
threshold_class_set: Set of classes in threshold scores dictionary.
|
|
1556
|
+
"""
|
|
1099
1557
|
self.name = name
|
|
1100
1558
|
self.prediction_class_set = prediction_class_set
|
|
1101
1559
|
self.threshold_class_set = threshold_class_set
|
|
1102
1560
|
|
|
1103
1561
|
def error_message(self) -> str:
|
|
1562
|
+
"""Return the error message for this exception."""
|
|
1104
1563
|
return (
|
|
1105
1564
|
"Multi-Class Prediction Scores and Threshold Scores Dictionaries must contain the same "
|
|
1106
1565
|
f"classes. The following classes of the Prediction Scores Dictionary are not in the Threshold "
|
|
@@ -1111,13 +1570,22 @@ class InvalidMultiClassThresholdClasses(ValidationError):
|
|
|
1111
1570
|
|
|
1112
1571
|
|
|
1113
1572
|
class InvalidAdditionalHeaders(ValidationError):
|
|
1573
|
+
"""Raised when additional headers use reserved header names."""
|
|
1574
|
+
|
|
1114
1575
|
def __repr__(self) -> str:
|
|
1576
|
+
"""Return a string representation for debugging and logging."""
|
|
1115
1577
|
return "Invalid_Additional_Headers"
|
|
1116
1578
|
|
|
1117
1579
|
def __init__(self, invalid_headers: Iterable) -> None:
|
|
1580
|
+
"""Initialize the exception with invalid headers context.
|
|
1581
|
+
|
|
1582
|
+
Args:
|
|
1583
|
+
invalid_headers: Headers that use reserved names.
|
|
1584
|
+
"""
|
|
1118
1585
|
self.invalid_header_names = invalid_headers
|
|
1119
1586
|
|
|
1120
1587
|
def error_message(self) -> str:
|
|
1588
|
+
"""Return the error message for this exception."""
|
|
1121
1589
|
return (
|
|
1122
1590
|
"Found invalid additional header, cannot use reserved headers named: "
|
|
1123
1591
|
f"{', '.join(map(str, self.invalid_header_names))}."
|
|
@@ -1125,14 +1593,24 @@ class InvalidAdditionalHeaders(ValidationError):
|
|
|
1125
1593
|
|
|
1126
1594
|
|
|
1127
1595
|
class InvalidRecord(ValidationError):
|
|
1596
|
+
"""Raised when records contain invalid or all-null column sets."""
|
|
1597
|
+
|
|
1128
1598
|
def __repr__(self) -> str:
|
|
1599
|
+
"""Return a string representation for debugging and logging."""
|
|
1129
1600
|
return "Invalid_Record"
|
|
1130
1601
|
|
|
1131
|
-
def __init__(self, columns:
|
|
1602
|
+
def __init__(self, columns: list[str], indexes: list[int]) -> None:
|
|
1603
|
+
"""Initialize the exception with invalid record context.
|
|
1604
|
+
|
|
1605
|
+
Args:
|
|
1606
|
+
columns: Columns that form an invalid all-null set.
|
|
1607
|
+
indexes: Row indexes containing the invalid records.
|
|
1608
|
+
"""
|
|
1132
1609
|
self.columns = columns
|
|
1133
1610
|
self.indexes = indexes
|
|
1134
1611
|
|
|
1135
1612
|
def error_message(self) -> str:
|
|
1613
|
+
"""Return the error message for this exception."""
|
|
1136
1614
|
return (
|
|
1137
1615
|
f"Invalid column set full of null values in one or more rows.\n"
|
|
1138
1616
|
f"\nProblematic Column Set:\n{log_a_list(self.columns, 'and')}\n"
|