arize 8.0.0a2__py3-none-any.whl → 8.0.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. arize/_exporter/client.py +28 -8
  2. arize/_exporter/parsers/tracing_data_parser.py +7 -4
  3. arize/_exporter/validation.py +7 -3
  4. arize/_flight/client.py +11 -14
  5. arize/_lazy.py +38 -36
  6. arize/client.py +36 -4
  7. arize/config.py +37 -3
  8. arize/constants/config.py +6 -0
  9. arize/constants/ml.py +33 -31
  10. arize/constants/model_mapping.json +199 -0
  11. arize/exceptions/base.py +47 -42
  12. arize/exceptions/models.py +12 -0
  13. arize/exceptions/parameters.py +342 -324
  14. arize/exceptions/values.py +16 -0
  15. arize/logging.py +6 -6
  16. arize/models/__init__.py +0 -0
  17. arize/models/batch_validation/__init__.py +0 -0
  18. arize/models/batch_validation/errors.py +1145 -0
  19. arize/models/batch_validation/validator.py +3711 -0
  20. arize/models/bounded_executor.py +34 -0
  21. arize/models/client.py +807 -0
  22. arize/models/stream_validation.py +214 -0
  23. arize/spans/client.py +55 -188
  24. arize/spans/validation/annotations/annotations_validation.py +8 -4
  25. arize/spans/validation/annotations/dataframe_form_validation.py +6 -2
  26. arize/spans/validation/annotations/value_validation.py +6 -3
  27. arize/spans/validation/common/argument_validation.py +5 -2
  28. arize/spans/validation/common/dataframe_form_validation.py +5 -2
  29. arize/spans/validation/evals/evals_validation.py +8 -4
  30. arize/spans/validation/evals/value_validation.py +8 -4
  31. arize/spans/validation/metadata/argument_validation.py +5 -2
  32. arize/spans/validation/spans/spans_validation.py +8 -4
  33. arize/spans/validation/spans/value_validation.py +8 -5
  34. arize/types.py +1421 -1366
  35. arize/utils/arrow.py +143 -2
  36. arize/utils/casting.py +396 -0
  37. arize/utils/proto.py +751 -310
  38. arize/version.py +1 -1
  39. {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/METADATA +165 -9
  40. {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/RECORD +43 -34
  41. /arize/utils/{pandas.py → dataframe.py} +0 -0
  42. {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/WHEEL +0 -0
  43. {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,199 @@
1
+ {
2
+ "external_model_types": [
3
+ "binary_classification",
4
+ "regression",
5
+ "ranking",
6
+ "object_detection"
7
+ ],
8
+ "metric_families": [
9
+ "classification",
10
+ "regression",
11
+ "ranking",
12
+ "auc_log_loss",
13
+ "ranking_label"
14
+ ],
15
+ "required_columns_map": [
16
+ {
17
+ "external_model_type": "binary_classification",
18
+ "mappings": [
19
+ {
20
+ "metrics": [["classification"]],
21
+ "required_columns": {
22
+ "arrow": {
23
+ "required": [["prediction_label_column_name"]],
24
+ "latent": [["actual_label_column_name"]]
25
+ },
26
+ "generic": {
27
+ "required": [["prediction_label"]],
28
+ "latent": [["actual_label"]]
29
+ }
30
+ }
31
+ },
32
+ {
33
+ "metrics": [["regression"]],
34
+ "required_columns": {
35
+ "arrow": {
36
+ "required": [["prediction_score_column_name"]],
37
+ "latent": [["actual_score_column_name"]]
38
+ },
39
+ "generic": {
40
+ "required": [["prediction_score"]],
41
+ "latent": [["actual_score"]]
42
+ }
43
+ }
44
+ },
45
+ {
46
+ "metrics": [
47
+ ["auc_log_loss", "classification"],
48
+ ["regression", "classification"]
49
+ ],
50
+ "required_columns": {
51
+ "arrow": {
52
+ "required": [
53
+ ["prediction_label_column_name", "prediction_score_column_name"]
54
+ ],
55
+ "latent": [["actual_label_column_name"]]
56
+ },
57
+ "generic": {
58
+ "required": [["prediction_label", "prediction_score"]],
59
+ "latent": [["actual_label"]]
60
+ }
61
+ }
62
+ },
63
+ {
64
+ "metrics": [["auc_log_loss"]],
65
+ "required_columns": {
66
+ "arrow": {
67
+ "required": [["prediction_score_column_name"]],
68
+ "latent": [["actual_label_column_name"]]
69
+ },
70
+ "generic": {
71
+ "required": [["prediction_score"]],
72
+ "latent": [["actual_label"]]
73
+ }
74
+ }
75
+ },
76
+ {
77
+ "metrics": [["regression", "auc_log_loss"]],
78
+ "required_columns": {
79
+ "arrow": {
80
+ "required": [["prediction_score_column_name"]],
81
+ "latent": [
82
+ ["actual_score_column_name", "actual_label_column_name"]
83
+ ]
84
+ },
85
+ "generic": {
86
+ "required": [["prediction_score"]],
87
+ "latent": [["actual_score", "actual_label"]]
88
+ }
89
+ }
90
+ },
91
+ {
92
+ "metrics": [["auc_log_loss", "classification", "regression"]],
93
+ "required_columns": {
94
+ "arrow": {
95
+ "required": [
96
+ ["prediction_label_column_name", "prediction_score_column_name"]
97
+ ],
98
+ "latent": [
99
+ ["actual_label_column_name", "actual_score_column_name"]
100
+ ]
101
+ },
102
+ "generic": {
103
+ "required": [["prediction_label", "prediction_score"]],
104
+ "latent": [["actual_label", "actual_score"]]
105
+ }
106
+ }
107
+ }
108
+ ]
109
+ },
110
+ {
111
+ "external_model_type": "regression",
112
+ "mappings": [
113
+ {
114
+ "metrics": [["regression"]],
115
+ "required_columns": {
116
+ "arrow": {
117
+ "required": [
118
+ ["prediction_score_column_name"],
119
+ ["prediction_label_column_name"]
120
+ ],
121
+ "latent": [["actual_score_column_name"]]
122
+ },
123
+ "generic": {
124
+ "required": [["prediction_score"]],
125
+ "latent": [["actual_score"]]
126
+ }
127
+ }
128
+ }
129
+ ]
130
+ },
131
+ {
132
+ "external_model_type": "ranking",
133
+ "mappings": [
134
+ {
135
+ "metrics": [["ranking"]],
136
+ "required_columns": {
137
+ "arrow": {
138
+ "required": [["rank_column_name"]],
139
+ "latent": [
140
+ ["relevance_labels_column_name"],
141
+ ["relevance_score_column_name"]
142
+ ]
143
+ },
144
+ "generic": {
145
+ "required": [["rank"]],
146
+ "latent": [["actual_score"], ["actual_label"]]
147
+ }
148
+ }
149
+ },
150
+ {
151
+ "metrics": [["ranking_label"], ["ranking", "ranking_label"]],
152
+ "required_columns": {
153
+ "arrow": {
154
+ "required": [["rank_column_name"]],
155
+ "latent": [["relevance_labels_column_name"]]
156
+ },
157
+ "generic": {
158
+ "required": [["rank", "prediction_label"]],
159
+ "latent": [["actual_label"]]
160
+ }
161
+ }
162
+ },
163
+ {
164
+ "metrics": [["ranking", "auc_log_loss"], ["auc_log_loss"]],
165
+ "required_columns": {
166
+ "arrow": {
167
+ "required": [
168
+ ["rank_column_name", "prediction_score_column_name"]
169
+ ],
170
+ "latent": [["relevance_labels_column_name"]]
171
+ },
172
+ "generic": {
173
+ "required": [["rank", "prediction_score"]],
174
+ "latent": [["actual_label"]]
175
+ }
176
+ }
177
+ }
178
+ ]
179
+ },
180
+ {
181
+ "external_model_type": "object_detection",
182
+ "mappings": [
183
+ {
184
+ "metrics": [["classification"]],
185
+ "required_columns": {
186
+ "arrow": {
187
+ "required": [["object_detection_prediction_column_names"]],
188
+ "latent": [["object_detection_actual_column_names"]]
189
+ },
190
+ "generic": {
191
+ "required": [["object_detection_prediction"]],
192
+ "latent": [["object_detection_actual"]]
193
+ }
194
+ }
195
+ }
196
+ ]
197
+ }
198
+ ]
199
+ }
arize/exceptions/base.py CHANGED
@@ -2,7 +2,12 @@ from abc import ABC, abstractmethod
2
2
  from collections.abc import Iterable
3
3
  from typing import List
4
4
 
5
- from arize.types import Environments
5
+ INVALID_ARROW_CONVERSION_MSG = (
6
+ "The dataframe needs to convert to pyarrow but has failed to do so. "
7
+ "There may be unrecognized data types in the dataframe. "
8
+ "Another reason may be that a column in the dataframe has a mix of strings and "
9
+ "numbers, in which case you may want to convert the strings in that column to NaN. "
10
+ )
6
11
 
7
12
 
8
13
  class ValidationError(Exception, ABC):
@@ -26,16 +31,16 @@ class ValidationFailure(Exception):
26
31
  # ----------------------
27
32
  # Minimum required checks
28
33
  # ----------------------
29
- class InvalidColumnNameEmptyString(ValidationError):
30
- def __repr__(self) -> str:
31
- return "Invalid_Column_Name_Empty_String"
32
-
33
- def error_message(self) -> str:
34
- return (
35
- "Empty column name found: ''. The schema cannot point to columns in the "
36
- "dataframe denoted by an empty string. You can see the columns used in the "
37
- "schema by running schema.get_used_columns()"
38
- )
34
+ # class InvalidColumnNameEmptyString(ValidationError):
35
+ # def __repr__(self) -> str:
36
+ # return "Invalid_Column_Name_Empty_String"
37
+ #
38
+ # def error_message(self) -> str:
39
+ # return (
40
+ # "Empty column name found: ''. The schema cannot point to columns in the "
41
+ # "dataframe denoted by an empty string. You can see the columns used in the "
42
+ # "schema by running schema.get_used_columns()"
43
+ # )
39
44
 
40
45
 
41
46
  class InvalidFieldTypeConversion(ValidationError):
@@ -53,29 +58,29 @@ class InvalidFieldTypeConversion(ValidationError):
53
58
  )
54
59
 
55
60
 
56
- class InvalidFieldTypeEmbeddingFeatures(ValidationError):
57
- def __repr__(self) -> str:
58
- return "Invalid_Input_Type_Embedding_Features"
59
-
60
- def __init__(self) -> None:
61
- pass
62
-
63
- def error_message(self) -> str:
64
- return (
65
- "schema.embedding_feature_column_names should be a dictionary mapping strings "
66
- "to EmbeddingColumnNames objects"
67
- )
68
-
69
-
70
- class InvalidFieldTypePromptResponse(ValidationError):
71
- def __repr__(self) -> str:
72
- return "Invalid_Input_Type_Prompt_Response"
61
+ # class InvalidFieldTypeEmbeddingFeatures(ValidationError):
62
+ # def __repr__(self) -> str:
63
+ # return "Invalid_Input_Type_Embedding_Features"
64
+ #
65
+ # def __init__(self) -> None:
66
+ # pass
67
+ #
68
+ # def error_message(self) -> str:
69
+ # return (
70
+ # "schema.embedding_feature_column_names should be a dictionary mapping strings "
71
+ # "to EmbeddingColumnNames objects"
72
+ # )
73
73
 
74
- def __init__(self, name: str) -> None:
75
- self.name = name
76
74
 
77
- def error_message(self) -> str:
78
- return f"'{self.name}' must be of type str or EmbeddingColumnNames"
75
+ # class InvalidFieldTypePromptResponse(ValidationError):
76
+ # def __repr__(self) -> str:
77
+ # return "Invalid_Input_Type_Prompt_Response"
78
+ #
79
+ # def __init__(self, name: str) -> None:
80
+ # self.name = name
81
+ #
82
+ # def error_message(self) -> str:
83
+ # return f"'{self.name}' must be of type str or EmbeddingColumnNames"
79
84
 
80
85
 
81
86
  class InvalidDataFrameIndex(ValidationError):
@@ -89,13 +94,13 @@ class InvalidDataFrameIndex(ValidationError):
89
94
  )
90
95
 
91
96
 
92
- class InvalidSchemaType(ValidationError):
93
- def __repr__(self) -> str:
94
- return "Invalid_Schema_Type"
95
-
96
- def __init__(self, schema_type: str, environment: Environments) -> None:
97
- self.schema_type = schema_type
98
- self.environment = environment
99
-
100
- def error_message(self) -> str:
101
- return f"Cannot use a {self.schema_type} for a model with environment: {self.environment}"
97
+ # class InvalidSchemaType(ValidationError):
98
+ # def __repr__(self) -> str:
99
+ # return "Invalid_Schema_Type"
100
+ #
101
+ # def __init__(self, schema_type: str, environment: Environments) -> None:
102
+ # self.schema_type = schema_type
103
+ # self.environment = environment
104
+ #
105
+ # def error_message(self) -> str:
106
+ # return f"Cannot use a {self.schema_type} for a model with environment: {self.environment}"
@@ -8,3 +8,15 @@ class MissingProjectNameError(Exception):
8
8
  @staticmethod
9
9
  def _default_message() -> str:
10
10
  return "Missing Project Name: pass project_name explicitly"
11
+
12
+
13
+ class MissingModelNameError(Exception):
14
+ def __init__(self, message: str = ""):
15
+ self.message = message
16
+
17
+ def __str__(self) -> str:
18
+ return self.message or self._default_message()
19
+
20
+ @staticmethod
21
+ def _default_message() -> str:
22
+ return "Missing Model Name: pass model name explicitly"