arize 8.0.0a2__py3-none-any.whl → 8.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/_exporter/client.py +28 -8
- arize/_exporter/parsers/tracing_data_parser.py +7 -4
- arize/_exporter/validation.py +7 -3
- arize/_flight/client.py +11 -14
- arize/_lazy.py +38 -36
- arize/client.py +36 -4
- arize/config.py +37 -3
- arize/constants/config.py +6 -0
- arize/constants/ml.py +33 -31
- arize/constants/model_mapping.json +199 -0
- arize/exceptions/base.py +47 -42
- arize/exceptions/models.py +12 -0
- arize/exceptions/parameters.py +342 -324
- arize/exceptions/values.py +16 -0
- arize/logging.py +6 -6
- arize/models/__init__.py +0 -0
- arize/models/batch_validation/__init__.py +0 -0
- arize/models/batch_validation/errors.py +1145 -0
- arize/models/batch_validation/validator.py +3711 -0
- arize/models/bounded_executor.py +34 -0
- arize/models/client.py +807 -0
- arize/models/stream_validation.py +214 -0
- arize/spans/client.py +55 -188
- arize/spans/validation/annotations/annotations_validation.py +8 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +6 -2
- arize/spans/validation/annotations/value_validation.py +6 -3
- arize/spans/validation/common/argument_validation.py +5 -2
- arize/spans/validation/common/dataframe_form_validation.py +5 -2
- arize/spans/validation/evals/evals_validation.py +8 -4
- arize/spans/validation/evals/value_validation.py +8 -4
- arize/spans/validation/metadata/argument_validation.py +5 -2
- arize/spans/validation/spans/spans_validation.py +8 -4
- arize/spans/validation/spans/value_validation.py +8 -5
- arize/types.py +1421 -1366
- arize/utils/arrow.py +143 -2
- arize/utils/casting.py +396 -0
- arize/utils/proto.py +751 -310
- arize/version.py +1 -1
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/METADATA +165 -9
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/RECORD +43 -34
- /arize/utils/{pandas.py → dataframe.py} +0 -0
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/WHEEL +0 -0
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/licenses/LICENSE.md +0 -0
arize/exceptions/parameters.py
CHANGED
|
@@ -1,164 +1,158 @@
|
|
|
1
|
-
from collections.abc import Iterable
|
|
2
|
-
from typing import List
|
|
3
|
-
|
|
4
1
|
from arize.constants.ml import MAX_NUMBER_OF_EMBEDDINGS
|
|
5
2
|
from arize.exceptions.base import ValidationError
|
|
6
|
-
from arize.logging import log_a_list
|
|
7
|
-
from arize.types import Environments, Metrics, ModelTypes
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class MissingPredictionIdColumnForDelayedRecords(ValidationError):
|
|
11
|
-
def __repr__(self) -> str:
|
|
12
|
-
return "Missing_Prediction_Id_Column_For_Delayed_Records"
|
|
13
|
-
|
|
14
|
-
def __init__(self, has_actual_info, has_feature_importance_info) -> None:
|
|
15
|
-
self.has_actual_info = has_actual_info
|
|
16
|
-
self.has_feature_importance_info = has_feature_importance_info
|
|
17
|
-
|
|
18
|
-
def error_message(self) -> str:
|
|
19
|
-
actual = "actual" if self.has_actual_info else ""
|
|
20
|
-
feat_imp = (
|
|
21
|
-
"feature importance" if self.has_feature_importance_info else ""
|
|
22
|
-
)
|
|
23
|
-
if self.has_actual_info and self.has_feature_importance_info:
|
|
24
|
-
msg = " and ".join([actual, feat_imp])
|
|
25
|
-
else:
|
|
26
|
-
msg = "".join([actual, feat_imp])
|
|
27
|
-
|
|
28
|
-
return (
|
|
29
|
-
"Missing 'prediction_id_column_name'. While prediction id is optional for most cases, "
|
|
30
|
-
"it is required when sending delayed actuals, i.e. when sending actual or feature importances "
|
|
31
|
-
f"without predictions. In this case, {msg} information was found (without predictions). "
|
|
32
|
-
"To learn more about delayed joins, please see the docs at "
|
|
33
|
-
"https://docs.arize.com/arize/sending-data-guides/how-to-send-delayed-actuals"
|
|
34
|
-
)
|
|
35
3
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
4
|
+
# class MissingPredictionIdColumnForDelayedRecords(ValidationError):
|
|
5
|
+
# def __repr__(self) -> str:
|
|
6
|
+
# return "Missing_Prediction_Id_Column_For_Delayed_Records"
|
|
7
|
+
#
|
|
8
|
+
# def __init__(self, has_actual_info, has_feature_importance_info) -> None:
|
|
9
|
+
# self.has_actual_info = has_actual_info
|
|
10
|
+
# self.has_feature_importance_info = has_feature_importance_info
|
|
11
|
+
#
|
|
12
|
+
# def error_message(self) -> str:
|
|
13
|
+
# actual = "actual" if self.has_actual_info else ""
|
|
14
|
+
# feat_imp = (
|
|
15
|
+
# "feature importance" if self.has_feature_importance_info else ""
|
|
16
|
+
# )
|
|
17
|
+
# if self.has_actual_info and self.has_feature_importance_info:
|
|
18
|
+
# msg = " and ".join([actual, feat_imp])
|
|
19
|
+
# else:
|
|
20
|
+
# msg = "".join([actual, feat_imp])
|
|
21
|
+
#
|
|
22
|
+
# return (
|
|
23
|
+
# "Missing 'prediction_id_column_name'. While prediction id is optional for most cases, "
|
|
24
|
+
# "it is required when sending delayed actuals, i.e. when sending actual or feature importances "
|
|
25
|
+
# f"without predictions. In this case, {msg} information was found (without predictions). "
|
|
26
|
+
# "To learn more about delayed joins, please see the docs at "
|
|
27
|
+
# "https://docs.arize.com/arize/sending-data-guides/how-to-send-delayed-actuals"
|
|
28
|
+
# )
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# class MissingColumns(ValidationError):
|
|
32
|
+
# def __repr__(self) -> str:
|
|
33
|
+
# return "Missing_Columns"
|
|
34
|
+
#
|
|
35
|
+
# def __init__(self, cols: Iterable) -> None:
|
|
36
|
+
# self.missing_cols = set(cols)
|
|
37
|
+
#
|
|
38
|
+
# def error_message(self) -> str:
|
|
39
|
+
# return (
|
|
40
|
+
# "The following columns are declared in the schema "
|
|
41
|
+
# "but are not found in the dataframe: "
|
|
42
|
+
# f"{', '.join(map(str, self.missing_cols))}."
|
|
43
|
+
# )
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# class MissingRequiredColumnsMetricsValidation(ValidationError):
|
|
47
|
+
# """
|
|
48
|
+
# This error is used only for model mapping validations.
|
|
49
|
+
# """
|
|
50
|
+
#
|
|
51
|
+
# def __repr__(self) -> str:
|
|
52
|
+
# return "Missing_Columns_Required_By_Metrics_Validation"
|
|
53
|
+
#
|
|
54
|
+
# def __init__(
|
|
55
|
+
# self, model_type: ModelTypes, metrics: List[Metrics], cols: Iterable
|
|
56
|
+
# ) -> None:
|
|
57
|
+
# self.model_type = model_type
|
|
58
|
+
# self.metrics = metrics
|
|
59
|
+
# self.missing_cols = cols
|
|
60
|
+
#
|
|
61
|
+
# def error_message(self) -> str:
|
|
62
|
+
# return (
|
|
63
|
+
# f"For logging data for a {self.model_type.name} model with support for metrics "
|
|
64
|
+
# f"{', '.join(m.name for m in self.metrics)}, "
|
|
65
|
+
# f"schema must include: {', '.join(map(str, self.missing_cols))}."
|
|
66
|
+
# )
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# class ReservedColumns(ValidationError):
|
|
70
|
+
# def __repr__(self) -> str:
|
|
71
|
+
# return "Reserved_Columns"
|
|
72
|
+
#
|
|
73
|
+
# def __init__(self, cols: Iterable) -> None:
|
|
74
|
+
# self.reserved_columns = cols
|
|
75
|
+
#
|
|
76
|
+
# def error_message(self) -> str:
|
|
77
|
+
# return (
|
|
78
|
+
# "The following columns are reserved and can only be specified "
|
|
79
|
+
# "in the proper fields of the schema: "
|
|
80
|
+
# f"{', '.join(map(str, self.reserved_columns))}."
|
|
81
|
+
# )
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# class InvalidModelTypeAndMetricsCombination(ValidationError):
|
|
85
|
+
# """
|
|
86
|
+
# This error is used only for model mapping validations.
|
|
87
|
+
# """
|
|
88
|
+
#
|
|
89
|
+
# def __repr__(self) -> str:
|
|
90
|
+
# return "Invalid_ModelType_And_Metrics_Combination"
|
|
91
|
+
#
|
|
92
|
+
# def __init__(
|
|
93
|
+
# self,
|
|
94
|
+
# model_type: ModelTypes,
|
|
95
|
+
# metrics: List[Metrics],
|
|
96
|
+
# suggested_model_metric_combinations: List[List[str]],
|
|
97
|
+
# ) -> None:
|
|
98
|
+
# self.model_type = model_type
|
|
99
|
+
# self.metrics = metrics
|
|
100
|
+
# self.suggested_combinations = suggested_model_metric_combinations
|
|
101
|
+
#
|
|
102
|
+
# def error_message(self) -> str:
|
|
103
|
+
# valid_combos = ", or \n".join(
|
|
104
|
+
# "[" + ", ".join(combo) + "]"
|
|
105
|
+
# for combo in self.suggested_combinations
|
|
106
|
+
# )
|
|
107
|
+
# return (
|
|
108
|
+
# f"Invalid combination of model type {self.model_type.name} and metrics: "
|
|
109
|
+
# f"{', '.join(m.name for m in self.metrics)}. "
|
|
110
|
+
# f"Valid Metric combinations for this model type:\n{valid_combos}.\n"
|
|
111
|
+
# )
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# class InvalidShapSuffix(ValidationError):
|
|
115
|
+
# def __repr__(self) -> str:
|
|
116
|
+
# return "Invalid_SHAP_Suffix"
|
|
117
|
+
#
|
|
118
|
+
# def __init__(self, cols: Iterable) -> None:
|
|
119
|
+
# self.invalid_column_names = cols
|
|
120
|
+
#
|
|
121
|
+
# def error_message(self) -> str:
|
|
122
|
+
# return (
|
|
123
|
+
# "The following features or tags must not be named with a `_shap` suffix: "
|
|
124
|
+
# f"{', '.join(map(str, self.invalid_column_names))}."
|
|
125
|
+
# )
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# class InvalidModelType(ValidationError):
|
|
129
|
+
# def __repr__(self) -> str:
|
|
130
|
+
# return "Invalid_Model_Type"
|
|
131
|
+
#
|
|
132
|
+
# def error_message(self) -> str:
|
|
133
|
+
# return (
|
|
134
|
+
# "Model type not valid. Choose one of the following: "
|
|
135
|
+
# f"{', '.join('ModelTypes.' + mt.name for mt in ModelTypes)}. "
|
|
136
|
+
# )
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# class InvalidEnvironment(ValidationError):
|
|
140
|
+
# def __repr__(self) -> str:
|
|
141
|
+
# return "Invalid_Environment"
|
|
142
|
+
#
|
|
143
|
+
# def error_message(self) -> str:
|
|
144
|
+
# return (
|
|
145
|
+
# "Environment not valid. Choose one of the following: "
|
|
146
|
+
# f"{', '.join('Environments.' + env.name for env in Environments)}. "
|
|
147
|
+
# )
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
# class InvalidBatchId(ValidationError):
|
|
151
|
+
# def __repr__(self) -> str:
|
|
152
|
+
# return "Invalid_Batch_ID"
|
|
153
|
+
#
|
|
154
|
+
# def error_message(self) -> str:
|
|
155
|
+
# return "Batch ID must be a nonempty string if logging to validation environment."
|
|
162
156
|
|
|
163
157
|
|
|
164
158
|
class InvalidModelVersion(ValidationError):
|
|
@@ -169,12 +163,12 @@ class InvalidModelVersion(ValidationError):
|
|
|
169
163
|
return "Model version must be a nonempty string."
|
|
170
164
|
|
|
171
165
|
|
|
172
|
-
class InvalidModelId(ValidationError):
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
166
|
+
# class InvalidModelId(ValidationError):
|
|
167
|
+
# def __repr__(self) -> str:
|
|
168
|
+
# return "Invalid_Model_ID"
|
|
169
|
+
#
|
|
170
|
+
# def error_message(self) -> str:
|
|
171
|
+
# return "Model ID must be a nonempty string."
|
|
178
172
|
|
|
179
173
|
|
|
180
174
|
class InvalidProjectName(ValidationError):
|
|
@@ -189,183 +183,207 @@ class InvalidProjectName(ValidationError):
|
|
|
189
183
|
)
|
|
190
184
|
|
|
191
185
|
|
|
192
|
-
class MissingPredActShap(ValidationError):
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
class MissingPreprodPredAct(ValidationError):
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
class MissingPreprodAct(ValidationError):
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
class MissingPreprodPredActNumericAndCategorical(ValidationError):
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
186
|
+
# class MissingPredActShap(ValidationError):
|
|
187
|
+
# def __repr__(self) -> str:
|
|
188
|
+
# return "Missing_Pred_or_Act_or_SHAP"
|
|
189
|
+
#
|
|
190
|
+
# def error_message(self) -> str:
|
|
191
|
+
# return (
|
|
192
|
+
# "The schema must specify at least one of the following: "
|
|
193
|
+
# "prediction label, actual label, or SHAP value column names"
|
|
194
|
+
# )
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# class MissingPreprodPredAct(ValidationError):
|
|
198
|
+
# def __repr__(self) -> str:
|
|
199
|
+
# return "Missing_Preproduction_Pred_and_Act"
|
|
200
|
+
#
|
|
201
|
+
# def error_message(self) -> str:
|
|
202
|
+
# return "For logging pre-production data, the schema must specify both "
|
|
203
|
+
# "prediction and actual label columns."
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
# class MissingPreprodAct(ValidationError):
|
|
207
|
+
# def __repr__(self) -> str:
|
|
208
|
+
# return "Missing_Preproduction_Act"
|
|
209
|
+
#
|
|
210
|
+
# def error_message(self) -> str:
|
|
211
|
+
# return "For logging pre-production data, the schema must specify actual label column."
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# class MissingPreprodPredActNumericAndCategorical(ValidationError):
|
|
215
|
+
# def __repr__(self) -> str:
|
|
216
|
+
# return "Missing_Preproduction_Pred_and_Act_Numeric_and_Categorical"
|
|
217
|
+
#
|
|
218
|
+
# def error_message(self) -> str:
|
|
219
|
+
# return (
|
|
220
|
+
# "For logging pre-production data for a numeric or a categorical model, "
|
|
221
|
+
# "the schema must specify both prediction and actual label or score columns."
|
|
222
|
+
# )
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
# class MissingRequiredColumnsForRankingModel(ValidationError):
|
|
226
|
+
# def __repr__(self) -> str:
|
|
227
|
+
# return "Missing_Required_Columns_For_Ranking_Model"
|
|
228
|
+
#
|
|
229
|
+
# def error_message(self) -> str:
|
|
230
|
+
# return (
|
|
231
|
+
# "For logging data for a ranking model, schema must specify: "
|
|
232
|
+
# "prediction_group_id_column_name and rank_column_name"
|
|
233
|
+
# )
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
# class MissingCVPredAct(ValidationError):
|
|
237
|
+
# def __repr__(self) -> str:
|
|
238
|
+
# return "Missing_CV_Prediction_or_Actual"
|
|
239
|
+
#
|
|
240
|
+
# def __init__(self, environment: Environments):
|
|
241
|
+
# self.environment = environment
|
|
242
|
+
#
|
|
243
|
+
# def error_message(self) -> str:
|
|
244
|
+
# if self.environment in (Environments.TRAINING, Environments.VALIDATION):
|
|
245
|
+
# env = "pre-production"
|
|
246
|
+
# opt = "and"
|
|
247
|
+
# elif self.environment == Environments.PRODUCTION:
|
|
248
|
+
# env = "production"
|
|
249
|
+
# opt = "or"
|
|
250
|
+
# else:
|
|
251
|
+
# raise TypeError("Invalid environment")
|
|
252
|
+
# return (
|
|
253
|
+
# f"For logging {env} data for an Object Detection model,"
|
|
254
|
+
# "the schema must specify one of: "
|
|
255
|
+
# f"('object_detection_prediction_column_names' {opt} "
|
|
256
|
+
# f"'object_detection_actual_column_names') "
|
|
257
|
+
# f"or ('semantic_segmentation_prediction_column_names' {opt} "
|
|
258
|
+
# f"'semantic_segmentation_actual_column_names') "
|
|
259
|
+
# f"or ('instance_segmentation_prediction_column_names' {opt} "
|
|
260
|
+
# f"'instance_segmentation_actual_column_names')"
|
|
261
|
+
# )
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# class MultipleCVPredAct(ValidationError):
|
|
265
|
+
# def __repr__(self) -> str:
|
|
266
|
+
# return "Multiple_CV_Prediction_or_Actual"
|
|
267
|
+
#
|
|
268
|
+
# def __init__(self, environment: Environments):
|
|
269
|
+
# self.environment = environment
|
|
270
|
+
#
|
|
271
|
+
# def error_message(self) -> str:
|
|
272
|
+
# return (
|
|
273
|
+
# "The schema must only specify one of the following: "
|
|
274
|
+
# "'object_detection_prediction_column_names'/'object_detection_actual_column_names'"
|
|
275
|
+
# "'semantic_segmentation_prediction_column_names'/'semantic_segmentation_actual_column_names'"
|
|
276
|
+
# "'instance_segmentation_prediction_column_names'/'instance_segmentation_actual_column_names'"
|
|
277
|
+
# )
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
# class InvalidPredActCVColumnNamesForModelType(ValidationError):
|
|
281
|
+
# def __repr__(self) -> str:
|
|
282
|
+
# return "Invalid_CV_Prediction_or_Actual_Column_Names_for_Model_Type"
|
|
283
|
+
#
|
|
284
|
+
# def __init__(
|
|
285
|
+
# self,
|
|
286
|
+
# invalid_model_type: ModelTypes,
|
|
287
|
+
# ) -> None:
|
|
288
|
+
# self.invalid_model_type = invalid_model_type
|
|
289
|
+
#
|
|
290
|
+
# def error_message(self) -> str:
|
|
291
|
+
# return (
|
|
292
|
+
# f"Cannot use 'object_detection_prediction_column_names' or "
|
|
293
|
+
# f"'object_detection_actual_column_names' or "
|
|
294
|
+
# f"'semantic_segmentation_prediction_column_names' or "
|
|
295
|
+
# f"'semantic_segmentation_actual_column_names' or "
|
|
296
|
+
# f"'instance_segmentation_prediction_column_names' or "
|
|
297
|
+
# f"'instance_segmentation_actual_column_names' for {self.invalid_model_type} model "
|
|
298
|
+
# f"type. They are only allowed for ModelTypes.OBJECT_DETECTION models"
|
|
299
|
+
# )
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# class MissingReqPredActColumnNamesForMultiClass(ValidationError):
|
|
303
|
+
# def __repr__(self) -> str:
|
|
304
|
+
# return "Missing_Required_Prediction_or_Actual_Column_Names_for_Multi_Class_Model_Type"
|
|
305
|
+
#
|
|
306
|
+
# def error_message(self) -> str:
|
|
307
|
+
# return (
|
|
308
|
+
# "For logging data for a multi class model, schema must specify: "
|
|
309
|
+
# "prediction_scores_column_name and/or actual_score_column_name. "
|
|
310
|
+
# "Optionally, you may include multi_class_threshold_scores_column_name"
|
|
311
|
+
# " (must include prediction_scores_column_name)"
|
|
312
|
+
# )
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# class InvalidPredActColumnNamesForModelType(ValidationError):
|
|
316
|
+
# def __repr__(self) -> str:
|
|
317
|
+
# return "Invalid_Prediction_or_Actual_Column_Names_for_Model_Type"
|
|
318
|
+
#
|
|
319
|
+
# def __init__(
|
|
320
|
+
# self,
|
|
321
|
+
# invalid_model_type: ModelTypes,
|
|
322
|
+
# allowed_fields: List[str],
|
|
323
|
+
# wrong_columns: List[str],
|
|
324
|
+
# ) -> None:
|
|
325
|
+
# self.invalid_model_type = invalid_model_type
|
|
326
|
+
# self.allowed_fields = allowed_fields
|
|
327
|
+
# self.wrong_columns = wrong_columns
|
|
328
|
+
#
|
|
329
|
+
# def error_message(self) -> str:
|
|
330
|
+
# allowed_col_msg = ""
|
|
331
|
+
# if self.allowed_fields is not None:
|
|
332
|
+
# allowed_col_msg = f" Allowed Schema fields are {log_a_list(self.allowed_fields, 'and')}"
|
|
333
|
+
# return (
|
|
334
|
+
# f"Invalid Schema fields for {self.invalid_model_type} model type. {allowed_col_msg}"
|
|
335
|
+
# "The following columns of your dataframe are sent as an invalid schema field: "
|
|
336
|
+
# f"{log_a_list(self.wrong_columns, 'and')}"
|
|
337
|
+
# )
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
# class DuplicateColumnsInDataframe(ValidationError):
|
|
341
|
+
# def __repr__(self) -> str:
|
|
342
|
+
# return "Duplicate_Columns_In_Dataframe"
|
|
343
|
+
#
|
|
344
|
+
# def __init__(self, cols: Iterable) -> None:
|
|
345
|
+
# self.duplicate_cols = cols
|
|
346
|
+
#
|
|
347
|
+
# def error_message(self) -> str:
|
|
348
|
+
# return (
|
|
349
|
+
# "The following columns are present in the schema and have duplicates in the dataframe: "
|
|
350
|
+
# f"{self.duplicate_cols}. "
|
|
351
|
+
# )
|
|
229
352
|
|
|
230
353
|
|
|
231
|
-
class
|
|
232
|
-
def __repr__(self) -> str:
|
|
233
|
-
return "Missing_Required_Columns_For_Ranking_Model"
|
|
234
|
-
|
|
235
|
-
def error_message(self) -> str:
|
|
236
|
-
return (
|
|
237
|
-
"For logging data for a ranking model, schema must specify: "
|
|
238
|
-
"prediction_group_id_column_name and rank_column_name"
|
|
239
|
-
)
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
class MissingCVPredAct(ValidationError):
|
|
243
|
-
def __repr__(self) -> str:
|
|
244
|
-
return "Missing_CV_Prediction_or_Actual"
|
|
245
|
-
|
|
246
|
-
def __init__(self, environment: Environments):
|
|
247
|
-
self.environment = environment
|
|
248
|
-
|
|
249
|
-
def error_message(self) -> str:
|
|
250
|
-
if self.environment in (Environments.TRAINING, Environments.VALIDATION):
|
|
251
|
-
env = "pre-production"
|
|
252
|
-
opt = "and"
|
|
253
|
-
elif self.environment == Environments.PRODUCTION:
|
|
254
|
-
env = "production"
|
|
255
|
-
opt = "or"
|
|
256
|
-
else:
|
|
257
|
-
raise TypeError("Invalid environment")
|
|
258
|
-
return (
|
|
259
|
-
f"For logging {env} data for an Object Detection model,"
|
|
260
|
-
"the schema must specify one of: "
|
|
261
|
-
f"('object_detection_prediction_column_names' {opt} "
|
|
262
|
-
f"'object_detection_actual_column_names') "
|
|
263
|
-
f"or ('semantic_segmentation_prediction_column_names' {opt} "
|
|
264
|
-
f"'semantic_segmentation_actual_column_names') "
|
|
265
|
-
f"or ('instance_segmentation_prediction_column_names' {opt} "
|
|
266
|
-
f"'instance_segmentation_actual_column_names')"
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
class MultipleCVPredAct(ValidationError):
|
|
271
|
-
def __repr__(self) -> str:
|
|
272
|
-
return "Multiple_CV_Prediction_or_Actual"
|
|
273
|
-
|
|
274
|
-
def __init__(self, environment: Environments):
|
|
275
|
-
self.environment = environment
|
|
276
|
-
|
|
277
|
-
def error_message(self) -> str:
|
|
278
|
-
return (
|
|
279
|
-
"The schema must only specify one of the following: "
|
|
280
|
-
"'object_detection_prediction_column_names'/'object_detection_actual_column_names'"
|
|
281
|
-
"'semantic_segmentation_prediction_column_names'/'semantic_segmentation_actual_column_names'"
|
|
282
|
-
"'instance_segmentation_prediction_column_names'/'instance_segmentation_actual_column_names'"
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
class InvalidPredActCVColumnNamesForModelType(ValidationError):
|
|
354
|
+
class InvalidNumberOfEmbeddings(ValidationError):
|
|
287
355
|
def __repr__(self) -> str:
|
|
288
|
-
return "
|
|
289
|
-
|
|
290
|
-
def __init__(
|
|
291
|
-
self,
|
|
292
|
-
invalid_model_type: ModelTypes,
|
|
293
|
-
) -> None:
|
|
294
|
-
self.invalid_model_type = invalid_model_type
|
|
295
|
-
|
|
296
|
-
def error_message(self) -> str:
|
|
297
|
-
return (
|
|
298
|
-
f"Cannot use 'object_detection_prediction_column_names' or "
|
|
299
|
-
f"'object_detection_actual_column_names' or "
|
|
300
|
-
f"'semantic_segmentation_prediction_column_names' or "
|
|
301
|
-
f"'semantic_segmentation_actual_column_names' or "
|
|
302
|
-
f"'instance_segmentation_prediction_column_names' or "
|
|
303
|
-
f"'instance_segmentation_actual_column_names' for {self.invalid_model_type} model "
|
|
304
|
-
f"type. They are only allowed for ModelTypes.OBJECT_DETECTION models"
|
|
305
|
-
)
|
|
306
|
-
|
|
356
|
+
return "Invalid_Number_Of_Embeddings"
|
|
307
357
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
return "Missing_Required_Prediction_or_Actual_Column_Names_for_Multi_Class_Model_Type"
|
|
358
|
+
def __init__(self, number_of_embeddings: int) -> None:
|
|
359
|
+
self.number_of_embeddings = number_of_embeddings
|
|
311
360
|
|
|
312
361
|
def error_message(self) -> str:
|
|
313
362
|
return (
|
|
314
|
-
"
|
|
315
|
-
"
|
|
316
|
-
"Optionally, you may include multi_class_threshold_scores_column_name"
|
|
317
|
-
" (must include prediction_scores_column_name)"
|
|
363
|
+
f"The schema contains {self.number_of_embeddings} different embeddings when a maximum of "
|
|
364
|
+
f"{MAX_NUMBER_OF_EMBEDDINGS} is allowed."
|
|
318
365
|
)
|
|
319
366
|
|
|
320
367
|
|
|
321
|
-
class
|
|
322
|
-
def __repr__(self) -> str:
|
|
323
|
-
return "Invalid_Prediction_or_Actual_Column_Names_for_Model_Type"
|
|
324
|
-
|
|
368
|
+
class InvalidValueType(Exception):
|
|
325
369
|
def __init__(
|
|
326
370
|
self,
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
371
|
+
value_name: str,
|
|
372
|
+
value: bool | int | float | str,
|
|
373
|
+
correct_type: str,
|
|
330
374
|
) -> None:
|
|
331
|
-
self.
|
|
332
|
-
self.
|
|
333
|
-
self.
|
|
334
|
-
|
|
335
|
-
def error_message(self) -> str:
|
|
336
|
-
allowed_col_msg = ""
|
|
337
|
-
if self.allowed_fields is not None:
|
|
338
|
-
allowed_col_msg = f" Allowed Schema fields are {log_a_list(self.allowed_fields, 'and')}"
|
|
339
|
-
return (
|
|
340
|
-
f"Invalid Schema fields for {self.invalid_model_type} model type. {allowed_col_msg}"
|
|
341
|
-
"The following columns of your dataframe are sent as an invalid schema field: "
|
|
342
|
-
f"{log_a_list(self.wrong_columns, 'and')}"
|
|
343
|
-
)
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
class DuplicateColumnsInDataframe(ValidationError):
|
|
347
|
-
def __repr__(self) -> str:
|
|
348
|
-
return "Duplicate_Columns_In_Dataframe"
|
|
349
|
-
|
|
350
|
-
def __init__(self, cols: Iterable) -> None:
|
|
351
|
-
self.duplicate_cols = cols
|
|
352
|
-
|
|
353
|
-
def error_message(self) -> str:
|
|
354
|
-
return (
|
|
355
|
-
"The following columns are present in the schema and have duplicates in the dataframe: "
|
|
356
|
-
f"{self.duplicate_cols}. "
|
|
357
|
-
)
|
|
375
|
+
self.value_name = value_name
|
|
376
|
+
self.value = value
|
|
377
|
+
self.correct_type = correct_type
|
|
358
378
|
|
|
359
|
-
|
|
360
|
-
class InvalidNumberOfEmbeddings(ValidationError):
|
|
361
379
|
def __repr__(self) -> str:
|
|
362
|
-
return "
|
|
380
|
+
return "Invalid_Value_Type"
|
|
363
381
|
|
|
364
|
-
def
|
|
365
|
-
self.
|
|
382
|
+
def __str__(self) -> str:
|
|
383
|
+
return self.error_message()
|
|
366
384
|
|
|
367
385
|
def error_message(self) -> str:
|
|
368
386
|
return (
|
|
369
|
-
f"
|
|
370
|
-
f"
|
|
387
|
+
f"{self.value_name} with value {self.value} is of type {type(self.value).__name__}, "
|
|
388
|
+
f"but expected {self.correct_type}"
|
|
371
389
|
)
|