arize 8.0.0a23__py3-none-any.whl → 8.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +11 -10
- arize/_exporter/client.py +1 -1
- arize/_generated/api_client/__init__.py +0 -2
- arize/_generated/api_client/models/__init__.py +0 -1
- arize/_generated/api_client/models/datasets_create_request.py +2 -10
- arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
- arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
- arize/_generated/api_client_README.md +0 -1
- arize/client.py +47 -163
- arize/config.py +59 -100
- arize/datasets/client.py +11 -6
- arize/embeddings/nlp_generators.py +12 -6
- arize/embeddings/tabular_generators.py +14 -11
- arize/experiments/__init__.py +12 -0
- arize/experiments/client.py +13 -9
- arize/experiments/functions.py +6 -6
- arize/experiments/types.py +3 -3
- arize/{models → ml}/batch_validation/errors.py +2 -2
- arize/{models → ml}/batch_validation/validator.py +5 -3
- arize/{models → ml}/casting.py +42 -78
- arize/{models → ml}/client.py +19 -17
- arize/{models → ml}/proto.py +2 -2
- arize/{models → ml}/stream_validation.py +1 -1
- arize/{models → ml}/surrogate_explainer/mimic.py +6 -2
- arize/{types.py → ml/types.py} +99 -234
- arize/pre_releases.py +2 -1
- arize/projects/client.py +11 -6
- arize/spans/client.py +91 -86
- arize/spans/conversion.py +11 -4
- arize/spans/validation/common/value_validation.py +1 -1
- arize/spans/validation/spans/dataframe_form_validation.py +1 -1
- arize/spans/validation/spans/value_validation.py +2 -1
- arize/utils/dataframe.py +1 -1
- arize/utils/online_tasks/dataframe_preprocessor.py +5 -6
- arize/utils/types.py +105 -0
- arize/version.py +1 -1
- {arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/METADATA +56 -59
- {arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/RECORD +50 -51
- arize/_generated/api_client/models/primitive_value.py +0 -172
- arize/_generated/api_client/test/test_primitive_value.py +0 -50
- /arize/{models → ml}/__init__.py +0 -0
- /arize/{models → ml}/batch_validation/__init__.py +0 -0
- /arize/{models → ml}/bounded_executor.py +0 -0
- /arize/{models → ml}/surrogate_explainer/__init__.py +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/WHEEL +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/licenses/NOTICE +0 -0
arize/{types.py → ml/types.py}
RENAMED
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
"""Common type definitions and data models used across the
|
|
1
|
+
"""Common type definitions and data models used across the ML Client."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
import logging
|
|
5
4
|
import math
|
|
6
|
-
from collections.abc import
|
|
5
|
+
from collections.abc import Iterator
|
|
7
6
|
from dataclasses import asdict, dataclass, replace
|
|
8
7
|
from datetime import datetime
|
|
9
8
|
from decimal import Decimal
|
|
@@ -12,7 +11,6 @@ from itertools import chain
|
|
|
12
11
|
from typing import (
|
|
13
12
|
NamedTuple,
|
|
14
13
|
Self,
|
|
15
|
-
TypeVar,
|
|
16
14
|
)
|
|
17
15
|
|
|
18
16
|
import numpy as np
|
|
@@ -40,6 +38,7 @@ from arize.exceptions.parameters import InvalidValueType
|
|
|
40
38
|
# )
|
|
41
39
|
# from arize.utils.errors import InvalidValueType
|
|
42
40
|
from arize.logging import get_truncation_warning_message
|
|
41
|
+
from arize.utils.types import is_dict_of, is_iterable_of, is_list_of
|
|
43
42
|
|
|
44
43
|
logger = logging.getLogger(__name__)
|
|
45
44
|
|
|
@@ -162,15 +161,12 @@ class Embedding(NamedTuple):
|
|
|
162
161
|
|
|
163
162
|
Ensures validations are passed for vector, data, and link_to_data fields.
|
|
164
163
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
vector belongs to
|
|
164
|
+
Args:
|
|
165
|
+
emb_name: Name of the embedding feature the
|
|
166
|
+
vector belongs to.
|
|
169
167
|
|
|
170
168
|
Raises:
|
|
171
|
-
|
|
172
|
-
TypeError: If the embedding fields are of the wrong type
|
|
173
|
-
|
|
169
|
+
TypeError: If the embedding fields are of the wrong type.
|
|
174
170
|
"""
|
|
175
171
|
if self.vector is not None:
|
|
176
172
|
self._validate_embedding_vector(emb_name)
|
|
@@ -194,15 +190,12 @@ class Embedding(NamedTuple):
|
|
|
194
190
|
Requirements: 1) Type must be list or convertible to list (like numpy arrays,
|
|
195
191
|
pandas Series), 2) List must not be empty, 3) Elements in list must be floats.
|
|
196
192
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
belongs to
|
|
193
|
+
Args:
|
|
194
|
+
emb_name: Name of the embedding feature the vector
|
|
195
|
+
belongs to.
|
|
201
196
|
|
|
202
197
|
Raises:
|
|
203
|
-
|
|
204
|
-
TypeError: If the embedding does not satisfy requirements above
|
|
205
|
-
|
|
198
|
+
TypeError: If the embedding does not satisfy requirements above.
|
|
206
199
|
"""
|
|
207
200
|
if not Embedding._is_valid_iterable(self.vector):
|
|
208
201
|
raise TypeError(
|
|
@@ -228,15 +221,13 @@ class Embedding(NamedTuple):
|
|
|
228
221
|
|
|
229
222
|
Requirement: Must be string or list of strings (NLP case).
|
|
230
223
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
224
|
+
Args:
|
|
225
|
+
emb_name: Name of the embedding feature the vector belongs to.
|
|
226
|
+
data: Raw data associated with the embedding feature.
|
|
227
|
+
Typically raw text.
|
|
235
228
|
|
|
236
229
|
Raises:
|
|
237
|
-
|
|
238
|
-
TypeError: If the embedding does not satisfy requirements above
|
|
239
|
-
|
|
230
|
+
TypeError: If the embedding does not satisfy requirements above.
|
|
240
231
|
"""
|
|
241
232
|
# Validate that data is a string or iterable of strings
|
|
242
233
|
is_string = isinstance(data, str)
|
|
@@ -276,16 +267,13 @@ class Embedding(NamedTuple):
|
|
|
276
267
|
|
|
277
268
|
Requirement: Must be string.
|
|
278
269
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
cloud storage
|
|
270
|
+
Args:
|
|
271
|
+
emb_name: Name of the embedding feature the vector belongs to.
|
|
272
|
+
link_to_data: Link to source data of embedding feature, typically an
|
|
273
|
+
image file on cloud storage.
|
|
284
274
|
|
|
285
275
|
Raises:
|
|
286
|
-
|
|
287
|
-
TypeError: If the embedding does not satisfy requirements above
|
|
288
|
-
|
|
276
|
+
TypeError: If the embedding does not satisfy requirements above.
|
|
289
277
|
"""
|
|
290
278
|
if not isinstance(link_to_data, str):
|
|
291
279
|
raise TypeError(
|
|
@@ -301,14 +289,12 @@ class Embedding(NamedTuple):
|
|
|
301
289
|
|
|
302
290
|
Accepted types: 1) List, 2) numpy array, or 3) pandas Series.
|
|
303
291
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
data: input iterable
|
|
292
|
+
Args:
|
|
293
|
+
data: Input iterable.
|
|
307
294
|
|
|
308
295
|
Returns:
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
296
|
+
True if the data type is one of the accepted iterable types,
|
|
297
|
+
false otherwise.
|
|
312
298
|
"""
|
|
313
299
|
return any(isinstance(data, t) for t in (list, np.ndarray))
|
|
314
300
|
|
|
@@ -379,18 +365,16 @@ class ObjectDetectionColumnNames(NamedTuple):
|
|
|
379
365
|
|
|
380
366
|
These values are assigned to the prediction or actual schema parameter.
|
|
381
367
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
bounding_boxes_coordinates_column_name (str): Column name containing the coordinates of the
|
|
368
|
+
Args:
|
|
369
|
+
bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
|
|
385
370
|
rectangular outline that locates an object within an image or video. Pascal VOC format
|
|
386
371
|
required. The contents of this column must be a List[List[float]].
|
|
387
|
-
categories_column_name
|
|
372
|
+
categories_column_name: Column name containing the predefined classes or labels used
|
|
388
373
|
by the model to classify the detected objects. The contents of this column must be List[str].
|
|
389
|
-
scores_column_names
|
|
374
|
+
scores_column_names: Column name containing the confidence scores that the
|
|
390
375
|
model assigns to it's predictions, indicating how certain the model is that the predicted
|
|
391
376
|
class is contained within the bounding box. This argument is only applicable for prediction
|
|
392
377
|
values. The contents of this column must be List[float].
|
|
393
|
-
|
|
394
378
|
"""
|
|
395
379
|
|
|
396
380
|
bounding_boxes_coordinates_column_name: str
|
|
@@ -403,15 +387,13 @@ class SemanticSegmentationColumnNames(NamedTuple):
|
|
|
403
387
|
|
|
404
388
|
These values are assigned to the prediction or actual schema parameter.
|
|
405
389
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
polygon_coordinates_column_name (str): Column name containing the coordinates of the vertices
|
|
390
|
+
Args:
|
|
391
|
+
polygon_coordinates_column_name: Column name containing the coordinates of the vertices
|
|
409
392
|
of the polygon mask within an image or video. The first sublist contains the
|
|
410
393
|
coordinates of the outline of the polygon. The subsequent sublists contain the coordinates
|
|
411
394
|
of any cutouts within the polygon. The contents of this column must be a List[List[float]].
|
|
412
|
-
categories_column_name
|
|
395
|
+
categories_column_name: Column name containing the predefined classes or labels used
|
|
413
396
|
by the model to classify the detected objects. The contents of this column must be List[str].
|
|
414
|
-
|
|
415
397
|
"""
|
|
416
398
|
|
|
417
399
|
polygon_coordinates_column_name: str
|
|
@@ -421,22 +403,20 @@ class SemanticSegmentationColumnNames(NamedTuple):
|
|
|
421
403
|
class InstanceSegmentationPredictionColumnNames(NamedTuple):
|
|
422
404
|
"""Used to log instance segmentation prediction values for the prediction schema parameter.
|
|
423
405
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
polygon_coordinates_column_name (str): Column name containing the coordinates of the vertices
|
|
406
|
+
Args:
|
|
407
|
+
polygon_coordinates_column_name: Column name containing the coordinates of the vertices
|
|
427
408
|
of the polygon mask within an image or video. The first sublist contains the
|
|
428
409
|
coordinates of the outline of the polygon. The subsequent sublists contain the coordinates
|
|
429
410
|
of any cutouts within the polygon. The contents of this column must be a List[List[float]].
|
|
430
|
-
categories_column_name
|
|
411
|
+
categories_column_name: Column name containing the predefined classes or labels used
|
|
431
412
|
by the model to classify the detected objects. The contents of this column must be List[str].
|
|
432
|
-
scores_column_name
|
|
413
|
+
scores_column_name: Column name containing the confidence scores that the
|
|
433
414
|
model assigns to it's predictions, indicating how certain the model is that the predicted
|
|
434
415
|
class is contained within the bounding box. This argument is only applicable for prediction
|
|
435
416
|
values. The contents of this column must be List[float].
|
|
436
|
-
bounding_boxes_coordinates_column_name
|
|
417
|
+
bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
|
|
437
418
|
rectangular outline that locates an object within an image or video. Pascal VOC format
|
|
438
419
|
required. The contents of this column must be a List[List[float]].
|
|
439
|
-
|
|
440
420
|
"""
|
|
441
421
|
|
|
442
422
|
polygon_coordinates_column_name: str
|
|
@@ -448,17 +428,15 @@ class InstanceSegmentationPredictionColumnNames(NamedTuple):
|
|
|
448
428
|
class InstanceSegmentationActualColumnNames(NamedTuple):
|
|
449
429
|
"""Used to log instance segmentation actual values that are assigned to the actual schema parameter.
|
|
450
430
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
polygon_coordinates_column_name (str): Column name containing the coordinates of the
|
|
431
|
+
Args:
|
|
432
|
+
polygon_coordinates_column_name: Column name containing the coordinates of the
|
|
454
433
|
polygon that locates an object within an image or video. The contents of this column
|
|
455
434
|
must be a List[List[float]].
|
|
456
|
-
categories_column_name
|
|
435
|
+
categories_column_name: Column name containing the predefined classes or labels used
|
|
457
436
|
by the model to classify the detected objects. The contents of this column must be List[str].
|
|
458
|
-
bounding_boxes_coordinates_column_name
|
|
437
|
+
bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
|
|
459
438
|
rectangular outline that locates an object within an image or video. Pascal VOC format
|
|
460
439
|
required. The contents of this column must be a List[List[float]].
|
|
461
|
-
|
|
462
440
|
"""
|
|
463
441
|
|
|
464
442
|
polygon_coordinates_column_name: str
|
|
@@ -743,13 +721,10 @@ class InstanceSegmentationActualLabel(NamedTuple):
|
|
|
743
721
|
class MultiClassPredictionLabel(NamedTuple):
|
|
744
722
|
"""Used to log multi class prediction label.
|
|
745
723
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
prediction_scores (Dict[str, Union[float, int]]): the prediction scores of the classes.
|
|
750
|
-
threshold_scores (Optional[Dict[str, Union[float, int]]]): the threshold scores of the classes.
|
|
724
|
+
Args:
|
|
725
|
+
prediction_scores: The prediction scores of the classes.
|
|
726
|
+
threshold_scores: The threshold scores of the classes.
|
|
751
727
|
Only Multi Label will have threshold scores.
|
|
752
|
-
|
|
753
728
|
"""
|
|
754
729
|
|
|
755
730
|
prediction_scores: dict[str, float | int]
|
|
@@ -848,12 +823,9 @@ class MultiClassPredictionLabel(NamedTuple):
|
|
|
848
823
|
class MultiClassActualLabel(NamedTuple):
|
|
849
824
|
"""Used to log multi class actual label.
|
|
850
825
|
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
actual_scores (Dict[str, Union[float, int]]): the actual scores of the classes.
|
|
855
|
-
Any class in actual_scores with a score of 1 will be sent to arize
|
|
856
|
-
|
|
826
|
+
Args:
|
|
827
|
+
actual_scores: The actual scores of the classes.
|
|
828
|
+
Any class in actual_scores with a score of 1 will be sent to arize.
|
|
857
829
|
"""
|
|
858
830
|
|
|
859
831
|
actual_scores: dict[str, float | int]
|
|
@@ -972,6 +944,15 @@ class RankingActualLabel(NamedTuple):
|
|
|
972
944
|
|
|
973
945
|
@staticmethod
|
|
974
946
|
def _validate_relevance_labels(relevance_labels: list[str]) -> None:
|
|
947
|
+
"""Validate relevance labels.
|
|
948
|
+
|
|
949
|
+
Args:
|
|
950
|
+
relevance_labels: List of relevance labels to validate.
|
|
951
|
+
|
|
952
|
+
Raises:
|
|
953
|
+
TypeError: If relevance_labels is not a list of strings.
|
|
954
|
+
ValueError: If any label is an empty string.
|
|
955
|
+
"""
|
|
975
956
|
if not is_list_of(relevance_labels, str):
|
|
976
957
|
raise TypeError("Actual Relevance Labels must be a list of strings")
|
|
977
958
|
if any(label == "" for label in relevance_labels):
|
|
@@ -981,6 +962,14 @@ class RankingActualLabel(NamedTuple):
|
|
|
981
962
|
|
|
982
963
|
@staticmethod
|
|
983
964
|
def _validate_relevance_score(relevance_score: float) -> None:
|
|
965
|
+
"""Validate relevance score.
|
|
966
|
+
|
|
967
|
+
Args:
|
|
968
|
+
relevance_score: Relevance score to validate.
|
|
969
|
+
|
|
970
|
+
Raises:
|
|
971
|
+
TypeError: If relevance_score is not a float or int.
|
|
972
|
+
"""
|
|
984
973
|
if not isinstance(relevance_score, (float, int)):
|
|
985
974
|
raise TypeError("Actual Relevance score must be a float or an int")
|
|
986
975
|
|
|
@@ -1133,33 +1122,20 @@ class BaseSchema:
|
|
|
1133
1122
|
class TypedColumns:
|
|
1134
1123
|
"""Optional class used for explicit type enforcement of feature and tag columns in the dataframe.
|
|
1135
1124
|
|
|
1136
|
-
|
|
1137
|
-
------
|
|
1138
|
-
When initializing a Schema, use TypedColumns in place of a list of string column names.
|
|
1139
|
-
e.g. feature_column_names=TypedColumns(
|
|
1140
|
-
inferred=["feature_1", "feature_2"],
|
|
1141
|
-
to_str=["feature_3"],
|
|
1142
|
-
to_int=["feature_4"]
|
|
1143
|
-
)
|
|
1125
|
+
When initializing a Schema, use TypedColumns in place of a list of string column names::
|
|
1144
1126
|
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
vs. not using TypedColumns at all.
|
|
1151
|
-
to_str (Optional[List[str]]): List of columns that should be cast to pandas StringDType.
|
|
1152
|
-
to_int (Optional[List[str]]): List of columns that should be cast to pandas Int64DType.
|
|
1153
|
-
to_float (Optional[List[str]]): List of columns that should be cast to pandas Float64DType.
|
|
1127
|
+
feature_column_names = TypedColumns(
|
|
1128
|
+
inferred=["feature_1", "feature_2"],
|
|
1129
|
+
to_str=["feature_3"],
|
|
1130
|
+
to_int=["feature_4"],
|
|
1131
|
+
)
|
|
1154
1132
|
|
|
1155
1133
|
Notes:
|
|
1156
|
-
-----
|
|
1157
1134
|
- If a TypedColumns object is included in a Schema, pandas version 1.0.0 or higher is required.
|
|
1158
1135
|
- Pandas StringDType is still considered an experimental field.
|
|
1159
1136
|
- Columns not present in any field will not be captured in the Schema.
|
|
1160
1137
|
- StringDType, Int64DType, and Float64DType are all nullable column types.
|
|
1161
|
-
|
|
1162
|
-
|
|
1138
|
+
Null values will be ingested and represented in Arize as empty values.
|
|
1163
1139
|
"""
|
|
1164
1140
|
|
|
1165
1141
|
inferred: list[str] | None = None
|
|
@@ -1188,92 +1164,80 @@ class TypedColumns:
|
|
|
1188
1164
|
class Schema(BaseSchema):
|
|
1189
1165
|
"""Used to organize and map column names containing model data within your Pandas dataframe to Arize.
|
|
1190
1166
|
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
prediction_id_column_name (str, optional): Column name for the predictions unique identifier.
|
|
1167
|
+
Args:
|
|
1168
|
+
prediction_id_column_name: Column name for the predictions unique identifier.
|
|
1194
1169
|
Unique IDs are used to match a prediction to delayed actuals or feature importances in Arize.
|
|
1195
1170
|
If prediction ids are not provided, it will default to an empty string "" and, when possible,
|
|
1196
1171
|
Arize will create a random prediction id on the server side. Prediction id must be a string column
|
|
1197
1172
|
with each row indicating a unique prediction event.
|
|
1198
|
-
feature_column_names
|
|
1173
|
+
feature_column_names: Column names for features.
|
|
1199
1174
|
The content of feature columns can be int, float, string. If TypedColumns is used,
|
|
1200
1175
|
the columns will be cast to the provided types prior to logging.
|
|
1201
|
-
tag_column_names
|
|
1176
|
+
tag_column_names: Column names for tags. The content of tag
|
|
1202
1177
|
columns can be int, float, string. If TypedColumns is used,
|
|
1203
1178
|
the columns will be cast to the provided types prior to logging.
|
|
1204
|
-
timestamp_column_name
|
|
1179
|
+
timestamp_column_name: Column name for timestamps. The content of this
|
|
1205
1180
|
column must be int Unix Timestamps in seconds.
|
|
1206
|
-
prediction_label_column_name
|
|
1181
|
+
prediction_label_column_name: Column name for categorical prediction values.
|
|
1207
1182
|
The content of this column must be convertible to string.
|
|
1208
|
-
prediction_score_column_name
|
|
1183
|
+
prediction_score_column_name: Column name for numeric prediction values. The
|
|
1209
1184
|
content of this column must be int/float or list of dictionaries mapping class names to
|
|
1210
1185
|
int/float scores in the case of MULTI_CLASS model types.
|
|
1211
|
-
actual_label_column_name
|
|
1186
|
+
actual_label_column_name: Column name for categorical ground truth values.
|
|
1212
1187
|
The content of this column must be convertible to string.
|
|
1213
|
-
actual_score_column_name
|
|
1188
|
+
actual_score_column_name: Column name for numeric ground truth values. The
|
|
1214
1189
|
content of this column must be int/float or list of dictionaries mapping class names to
|
|
1215
1190
|
int/float scores in the case of MULTI_CLASS model types.
|
|
1216
|
-
shap_values_column_names
|
|
1191
|
+
shap_values_column_names: Dictionary mapping feature column name
|
|
1217
1192
|
and corresponding SHAP feature importance column name. e.g.
|
|
1218
1193
|
{{"feat_A": "feat_A_shap", "feat_B": "feat_B_shap"}}
|
|
1219
|
-
embedding_feature_column_names
|
|
1194
|
+
embedding_feature_column_names: Dictionary
|
|
1220
1195
|
mapping embedding display names to EmbeddingColumnNames objects.
|
|
1221
|
-
prediction_group_id_column_name
|
|
1196
|
+
prediction_group_id_column_name: Column name for ranking groups or lists in
|
|
1222
1197
|
ranking models. The content of this column must be string and is limited to 128 characters.
|
|
1223
|
-
rank_column_name
|
|
1198
|
+
rank_column_name: Column name for rank of each element on the its group or
|
|
1224
1199
|
list. The content of this column must be integer between 1-100.
|
|
1225
|
-
relevance_score_column_name
|
|
1200
|
+
relevance_score_column_name: Column name for ranking model type numeric
|
|
1226
1201
|
ground truth values. The content of this column must be int/float.
|
|
1227
|
-
relevance_labels_column_name
|
|
1202
|
+
relevance_labels_column_name: Column name for ranking model type categorical
|
|
1228
1203
|
ground truth values. The content of this column must be a string.
|
|
1229
|
-
object_detection_prediction_column_names
|
|
1204
|
+
object_detection_prediction_column_names:
|
|
1230
1205
|
ObjectDetectionColumnNames object containing information defining the predicted bounding
|
|
1231
1206
|
boxes' coordinates, categories, and scores.
|
|
1232
|
-
object_detection_actual_column_names
|
|
1207
|
+
object_detection_actual_column_names:
|
|
1233
1208
|
ObjectDetectionColumnNames object containing information defining the actual bounding
|
|
1234
1209
|
boxes' coordinates, categories, and scores.
|
|
1235
|
-
prompt_column_names
|
|
1210
|
+
prompt_column_names: column names for text that is passed
|
|
1236
1211
|
to the GENERATIVE_LLM model. It accepts a string (if sending only a text column) or
|
|
1237
1212
|
EmbeddingColumnNames object containing the embedding vector data (required) and raw text
|
|
1238
1213
|
(optional) for the input text your model acts on.
|
|
1239
|
-
response_column_names
|
|
1214
|
+
response_column_names: column names for text generated by
|
|
1240
1215
|
the GENERATIVE_LLM model. It accepts a string (if sending only a text column) or
|
|
1241
1216
|
EmbeddingColumnNames object containing the embedding vector data (required) and raw text
|
|
1242
1217
|
(optional) for the text your model generates.
|
|
1243
|
-
prompt_template_column_names
|
|
1218
|
+
prompt_template_column_names: PromptTemplateColumnNames object
|
|
1244
1219
|
containing the prompt template and the prompt template version.
|
|
1245
|
-
llm_config_column_names
|
|
1220
|
+
llm_config_column_names: LLMConfigColumnNames object containing
|
|
1246
1221
|
the LLM's model name and its hyper parameters used at inference.
|
|
1247
|
-
llm_run_metadata_column_names
|
|
1222
|
+
llm_run_metadata_column_names: LLMRunMetadataColumnNames
|
|
1248
1223
|
object containing token counts and latency metrics
|
|
1249
|
-
retrieved_document_ids_column_name
|
|
1224
|
+
retrieved_document_ids_column_name: Column name for retrieved document ids.
|
|
1250
1225
|
The content of this column must be lists with entries convertible to strings.
|
|
1251
|
-
multi_class_threshold_scores_column_name
|
|
1226
|
+
multi_class_threshold_scores_column_name:
|
|
1252
1227
|
Column name for dictionary that maps class names to threshold values. The
|
|
1253
1228
|
content of this column must be dictionary of str -> int/float.
|
|
1254
|
-
semantic_segmentation_prediction_column_names
|
|
1229
|
+
semantic_segmentation_prediction_column_names:
|
|
1255
1230
|
SemanticSegmentationColumnNames object containing information defining the predicted
|
|
1256
1231
|
polygon coordinates and categories.
|
|
1257
|
-
semantic_segmentation_actual_column_names
|
|
1232
|
+
semantic_segmentation_actual_column_names:
|
|
1258
1233
|
SemanticSegmentationColumnNames object containing information defining the actual
|
|
1259
1234
|
polygon coordinates and categories.
|
|
1260
|
-
instance_segmentation_prediction_column_names
|
|
1235
|
+
instance_segmentation_prediction_column_names:
|
|
1261
1236
|
InstanceSegmentationPredictionColumnNames object containing information defining the predicted
|
|
1262
1237
|
polygon coordinates, categories, scores, and bounding box coordinates.
|
|
1263
|
-
instance_segmentation_actual_column_names
|
|
1238
|
+
instance_segmentation_actual_column_names:
|
|
1264
1239
|
InstanceSegmentationActualColumnNames object containing information defining the actual
|
|
1265
1240
|
polygon coordinates, categories, scores, and bounding box coordinates.
|
|
1266
|
-
|
|
1267
|
-
Methods:
|
|
1268
|
-
-------
|
|
1269
|
-
replace(**changes):
|
|
1270
|
-
Replaces fields of the schema
|
|
1271
|
-
asdict():
|
|
1272
|
-
Returns the schema as a dictionary. Warning: the types are not maintained, fields are
|
|
1273
|
-
converted to strings.
|
|
1274
|
-
get_used_columns():
|
|
1275
|
-
Returns a set with the unique collection of columns to be used from the dataframe.
|
|
1276
|
-
|
|
1277
1241
|
"""
|
|
1278
1242
|
|
|
1279
1243
|
prediction_id_column_name: str | None = None
|
|
@@ -1545,105 +1509,6 @@ class TypedValue:
|
|
|
1545
1509
|
value: str | bool | float | int
|
|
1546
1510
|
|
|
1547
1511
|
|
|
1548
|
-
def is_json_str(s: str) -> bool:
|
|
1549
|
-
"""Check if a string is valid JSON.
|
|
1550
|
-
|
|
1551
|
-
Args:
|
|
1552
|
-
s: The string to validate.
|
|
1553
|
-
|
|
1554
|
-
Returns:
|
|
1555
|
-
True if the string is valid JSON, False otherwise.
|
|
1556
|
-
"""
|
|
1557
|
-
try:
|
|
1558
|
-
json.loads(s)
|
|
1559
|
-
except ValueError:
|
|
1560
|
-
return False
|
|
1561
|
-
except TypeError:
|
|
1562
|
-
return False
|
|
1563
|
-
return True
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
T = TypeVar("T", bound=type)
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
def is_array_of(arr: Sequence[object], tp: T) -> bool:
|
|
1570
|
-
"""Check if a value is a numpy array with all elements of a specific type.
|
|
1571
|
-
|
|
1572
|
-
Args:
|
|
1573
|
-
arr: The sequence to check.
|
|
1574
|
-
tp: The expected type for all elements.
|
|
1575
|
-
|
|
1576
|
-
Returns:
|
|
1577
|
-
True if arr is a numpy array and all elements are of type tp.
|
|
1578
|
-
"""
|
|
1579
|
-
return isinstance(arr, np.ndarray) and all(isinstance(x, tp) for x in arr)
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
def is_list_of(lst: Sequence[object], tp: T) -> bool:
|
|
1583
|
-
"""Check if a value is a list with all elements of a specific type.
|
|
1584
|
-
|
|
1585
|
-
Args:
|
|
1586
|
-
lst: The sequence to check.
|
|
1587
|
-
tp: The expected type for all elements.
|
|
1588
|
-
|
|
1589
|
-
Returns:
|
|
1590
|
-
True if lst is a list and all elements are of type tp.
|
|
1591
|
-
"""
|
|
1592
|
-
return isinstance(lst, list) and all(isinstance(x, tp) for x in lst)
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
def is_iterable_of(lst: Sequence[object], tp: T) -> bool:
|
|
1596
|
-
"""Check if a value is an iterable with all elements of a specific type.
|
|
1597
|
-
|
|
1598
|
-
Args:
|
|
1599
|
-
lst: The sequence to check.
|
|
1600
|
-
tp: The expected type for all elements.
|
|
1601
|
-
|
|
1602
|
-
Returns:
|
|
1603
|
-
True if lst is an iterable and all elements are of type tp.
|
|
1604
|
-
"""
|
|
1605
|
-
return isinstance(lst, Iterable) and all(isinstance(x, tp) for x in lst)
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
def is_dict_of(
|
|
1609
|
-
d: dict[object, object],
|
|
1610
|
-
key_allowed_types: T,
|
|
1611
|
-
value_allowed_types: T = (),
|
|
1612
|
-
value_list_allowed_types: T = (),
|
|
1613
|
-
) -> bool:
|
|
1614
|
-
"""Method to check types are valid for dictionary.
|
|
1615
|
-
|
|
1616
|
-
Arguments:
|
|
1617
|
-
---------
|
|
1618
|
-
d (Dict[object, object]): dictionary itself
|
|
1619
|
-
key_allowed_types (T): all allowed types for keys of dictionary
|
|
1620
|
-
value_allowed_types (T): all allowed types for values of dictionary
|
|
1621
|
-
value_list_allowed_types (T): if value is a list, these are the allowed
|
|
1622
|
-
types for value list
|
|
1623
|
-
|
|
1624
|
-
Returns:
|
|
1625
|
-
-------
|
|
1626
|
-
True if the data types of dictionary match the types specified by the
|
|
1627
|
-
arguments, false otherwise
|
|
1628
|
-
|
|
1629
|
-
"""
|
|
1630
|
-
if value_list_allowed_types and not isinstance(
|
|
1631
|
-
value_list_allowed_types, tuple
|
|
1632
|
-
):
|
|
1633
|
-
value_list_allowed_types = (value_list_allowed_types,)
|
|
1634
|
-
|
|
1635
|
-
return (
|
|
1636
|
-
isinstance(d, dict)
|
|
1637
|
-
and all(isinstance(k, key_allowed_types) for k in d)
|
|
1638
|
-
and all(
|
|
1639
|
-
isinstance(v, value_allowed_types)
|
|
1640
|
-
or any(is_list_of(v, t) for t in value_list_allowed_types)
|
|
1641
|
-
for v in d.values()
|
|
1642
|
-
if value_allowed_types or value_list_allowed_types
|
|
1643
|
-
)
|
|
1644
|
-
)
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
1512
|
def _count_characters_raw_data(data: str | list[str]) -> int:
|
|
1648
1513
|
character_count = 0
|
|
1649
1514
|
if isinstance(data, str):
|
arize/pre_releases.py
CHANGED
|
@@ -21,8 +21,9 @@ _WARNED: set[str] = set()
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
|
|
24
|
+
article = "an" if stage is ReleaseStage.ALPHA else "a"
|
|
24
25
|
return (
|
|
25
|
-
f"[{stage.upper()}] {key} is
|
|
26
|
+
f"[{stage.upper()}] {key} is {article} {stage} API "
|
|
26
27
|
f"in Arize SDK v{__version__} and may change without notice."
|
|
27
28
|
)
|
|
28
29
|
|
arize/projects/client.py
CHANGED
|
@@ -15,17 +15,22 @@ logger = logging.getLogger(__name__)
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class ProjectsClient:
|
|
18
|
-
"""Client for managing Arize projects and project-level operations.
|
|
18
|
+
"""Client for managing Arize projects and project-level operations.
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
This class is primarily intended for internal use within the SDK. Users are
|
|
21
|
+
highly encouraged to access resource-specific functionality via
|
|
22
|
+
:class:`arize.ArizeClient`.
|
|
22
23
|
|
|
23
|
-
|
|
24
|
-
|
|
24
|
+
The projects client is a thin wrapper around the generated REST API client,
|
|
25
|
+
using the shared generated API client owned by
|
|
26
|
+
:class:`arize.config.SDKConfiguration`.
|
|
27
|
+
"""
|
|
25
28
|
|
|
29
|
+
def __init__(self, *, sdk_config: SDKConfiguration) -> None:
|
|
30
|
+
"""
|
|
26
31
|
Args:
|
|
27
32
|
sdk_config: Resolved SDK configuration.
|
|
28
|
-
"""
|
|
33
|
+
""" # noqa: D205, D212
|
|
29
34
|
self._sdk_config = sdk_config
|
|
30
35
|
|
|
31
36
|
# Import at runtime so it's still lazy and extras-gated by the parent
|