arize 8.0.0a23__py3-none-any.whl → 8.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. arize/__init__.py +11 -10
  2. arize/_exporter/client.py +1 -1
  3. arize/client.py +36 -126
  4. arize/config.py +59 -100
  5. arize/datasets/client.py +11 -6
  6. arize/embeddings/nlp_generators.py +12 -6
  7. arize/embeddings/tabular_generators.py +14 -11
  8. arize/experiments/__init__.py +12 -0
  9. arize/experiments/client.py +11 -6
  10. arize/{models → ml}/batch_validation/errors.py +2 -2
  11. arize/{models → ml}/batch_validation/validator.py +5 -3
  12. arize/{models → ml}/casting.py +42 -78
  13. arize/{models → ml}/client.py +19 -17
  14. arize/{models → ml}/proto.py +2 -2
  15. arize/{models → ml}/stream_validation.py +1 -1
  16. arize/{models → ml}/surrogate_explainer/mimic.py +6 -2
  17. arize/{types.py → ml/types.py} +99 -234
  18. arize/pre_releases.py +2 -1
  19. arize/projects/client.py +11 -6
  20. arize/spans/client.py +89 -84
  21. arize/spans/conversion.py +11 -4
  22. arize/spans/validation/common/value_validation.py +1 -1
  23. arize/spans/validation/spans/dataframe_form_validation.py +1 -1
  24. arize/spans/validation/spans/value_validation.py +2 -1
  25. arize/utils/dataframe.py +1 -1
  26. arize/utils/online_tasks/dataframe_preprocessor.py +5 -6
  27. arize/utils/types.py +105 -0
  28. arize/version.py +1 -1
  29. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/METADATA +10 -4
  30. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/RECORD +37 -36
  31. /arize/{models → ml}/__init__.py +0 -0
  32. /arize/{models → ml}/batch_validation/__init__.py +0 -0
  33. /arize/{models → ml}/bounded_executor.py +0 -0
  34. /arize/{models → ml}/surrogate_explainer/__init__.py +0 -0
  35. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/WHEEL +0 -0
  36. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/LICENSE +0 -0
  37. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/NOTICE +0 -0
@@ -1,9 +1,8 @@
1
- """Common type definitions and data models used across the Arize SDK."""
1
+ """Common type definitions and data models used across the ML Client."""
2
2
 
3
- import json
4
3
  import logging
5
4
  import math
6
- from collections.abc import Iterable, Iterator, Sequence
5
+ from collections.abc import Iterator
7
6
  from dataclasses import asdict, dataclass, replace
8
7
  from datetime import datetime
9
8
  from decimal import Decimal
@@ -12,7 +11,6 @@ from itertools import chain
12
11
  from typing import (
13
12
  NamedTuple,
14
13
  Self,
15
- TypeVar,
16
14
  )
17
15
 
18
16
  import numpy as np
@@ -40,6 +38,7 @@ from arize.exceptions.parameters import InvalidValueType
40
38
  # )
41
39
  # from arize.utils.errors import InvalidValueType
42
40
  from arize.logging import get_truncation_warning_message
41
+ from arize.utils.types import is_dict_of, is_iterable_of, is_list_of
43
42
 
44
43
  logger = logging.getLogger(__name__)
45
44
 
@@ -162,15 +161,12 @@ class Embedding(NamedTuple):
162
161
 
163
162
  Ensures validations are passed for vector, data, and link_to_data fields.
164
163
 
165
- Arguments:
166
- ---------
167
- emb_name (str, int, float): Name of the embedding feature the
168
- vector belongs to
164
+ Args:
165
+ emb_name: Name of the embedding feature the
166
+ vector belongs to.
169
167
 
170
168
  Raises:
171
- ------
172
- TypeError: If the embedding fields are of the wrong type
173
-
169
+ TypeError: If the embedding fields are of the wrong type.
174
170
  """
175
171
  if self.vector is not None:
176
172
  self._validate_embedding_vector(emb_name)
@@ -194,15 +190,12 @@ class Embedding(NamedTuple):
194
190
  Requirements: 1) Type must be list or convertible to list (like numpy arrays,
195
191
  pandas Series), 2) List must not be empty, 3) Elements in list must be floats.
196
192
 
197
- Arguments:
198
- ---------
199
- emb_name (str, int, float): Name of the embedding feature the vector
200
- belongs to
193
+ Args:
194
+ emb_name: Name of the embedding feature the vector
195
+ belongs to.
201
196
 
202
197
  Raises:
203
- ------
204
- TypeError: If the embedding does not satisfy requirements above
205
-
198
+ TypeError: If the embedding does not satisfy requirements above.
206
199
  """
207
200
  if not Embedding._is_valid_iterable(self.vector):
208
201
  raise TypeError(
@@ -228,15 +221,13 @@ class Embedding(NamedTuple):
228
221
 
229
222
  Requirement: Must be string or list of strings (NLP case).
230
223
 
231
- Arguments:
232
- ---------
233
- emb_name (str, int, float): Name of the embedding feature the vector belongs to
234
- data (str, int, float): Raw data associated with the embedding feature. Typically raw text.
224
+ Args:
225
+ emb_name: Name of the embedding feature the vector belongs to.
226
+ data: Raw data associated with the embedding feature.
227
+ Typically raw text.
235
228
 
236
229
  Raises:
237
- ------
238
- TypeError: If the embedding does not satisfy requirements above
239
-
230
+ TypeError: If the embedding does not satisfy requirements above.
240
231
  """
241
232
  # Validate that data is a string or iterable of strings
242
233
  is_string = isinstance(data, str)
@@ -276,16 +267,13 @@ class Embedding(NamedTuple):
276
267
 
277
268
  Requirement: Must be string.
278
269
 
279
- Arguments:
280
- ---------
281
- emb_name (str, int, float): Name of the embedding feature the vector belongs to
282
- link_to_data (str): Link to source data of embedding feature, typically an image file on
283
- cloud storage
270
+ Args:
271
+ emb_name: Name of the embedding feature the vector belongs to.
272
+ link_to_data: Link to source data of embedding feature, typically an
273
+ image file on cloud storage.
284
274
 
285
275
  Raises:
286
- ------
287
- TypeError: If the embedding does not satisfy requirements above
288
-
276
+ TypeError: If the embedding does not satisfy requirements above.
289
277
  """
290
278
  if not isinstance(link_to_data, str):
291
279
  raise TypeError(
@@ -301,14 +289,12 @@ class Embedding(NamedTuple):
301
289
 
302
290
  Accepted types: 1) List, 2) numpy array, or 3) pandas Series.
303
291
 
304
- Arguments:
305
- ---------
306
- data: input iterable
292
+ Args:
293
+ data: Input iterable.
307
294
 
308
295
  Returns:
309
- -------
310
- True if the data type is one of the accepted iterable types, false otherwise
311
-
296
+ True if the data type is one of the accepted iterable types,
297
+ false otherwise.
312
298
  """
313
299
  return any(isinstance(data, t) for t in (list, np.ndarray))
314
300
 
@@ -379,18 +365,16 @@ class ObjectDetectionColumnNames(NamedTuple):
379
365
 
380
366
  These values are assigned to the prediction or actual schema parameter.
381
367
 
382
- Arguments:
383
- ---------
384
- bounding_boxes_coordinates_column_name (str): Column name containing the coordinates of the
368
+ Args:
369
+ bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
385
370
  rectangular outline that locates an object within an image or video. Pascal VOC format
386
371
  required. The contents of this column must be a List[List[float]].
387
- categories_column_name (str): Column name containing the predefined classes or labels used
372
+ categories_column_name: Column name containing the predefined classes or labels used
388
373
  by the model to classify the detected objects. The contents of this column must be List[str].
389
- scores_column_names (str, optional): Column name containing the confidence scores that the
374
+ scores_column_names: Column name containing the confidence scores that the
390
375
  model assigns to it's predictions, indicating how certain the model is that the predicted
391
376
  class is contained within the bounding box. This argument is only applicable for prediction
392
377
  values. The contents of this column must be List[float].
393
-
394
378
  """
395
379
 
396
380
  bounding_boxes_coordinates_column_name: str
@@ -403,15 +387,13 @@ class SemanticSegmentationColumnNames(NamedTuple):
403
387
 
404
388
  These values are assigned to the prediction or actual schema parameter.
405
389
 
406
- Arguments:
407
- ---------
408
- polygon_coordinates_column_name (str): Column name containing the coordinates of the vertices
390
+ Args:
391
+ polygon_coordinates_column_name: Column name containing the coordinates of the vertices
409
392
  of the polygon mask within an image or video. The first sublist contains the
410
393
  coordinates of the outline of the polygon. The subsequent sublists contain the coordinates
411
394
  of any cutouts within the polygon. The contents of this column must be a List[List[float]].
412
- categories_column_name (str): Column name containing the predefined classes or labels used
395
+ categories_column_name: Column name containing the predefined classes or labels used
413
396
  by the model to classify the detected objects. The contents of this column must be List[str].
414
-
415
397
  """
416
398
 
417
399
  polygon_coordinates_column_name: str
@@ -421,22 +403,20 @@ class SemanticSegmentationColumnNames(NamedTuple):
421
403
  class InstanceSegmentationPredictionColumnNames(NamedTuple):
422
404
  """Used to log instance segmentation prediction values for the prediction schema parameter.
423
405
 
424
- Arguments:
425
- ---------
426
- polygon_coordinates_column_name (str): Column name containing the coordinates of the vertices
406
+ Args:
407
+ polygon_coordinates_column_name: Column name containing the coordinates of the vertices
427
408
  of the polygon mask within an image or video. The first sublist contains the
428
409
  coordinates of the outline of the polygon. The subsequent sublists contain the coordinates
429
410
  of any cutouts within the polygon. The contents of this column must be a List[List[float]].
430
- categories_column_name (str): Column name containing the predefined classes or labels used
411
+ categories_column_name: Column name containing the predefined classes or labels used
431
412
  by the model to classify the detected objects. The contents of this column must be List[str].
432
- scores_column_name (str, optional): Column name containing the confidence scores that the
413
+ scores_column_name: Column name containing the confidence scores that the
433
414
  model assigns to it's predictions, indicating how certain the model is that the predicted
434
415
  class is contained within the bounding box. This argument is only applicable for prediction
435
416
  values. The contents of this column must be List[float].
436
- bounding_boxes_coordinates_column_name (str, optional): Column name containing the coordinates of the
417
+ bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
437
418
  rectangular outline that locates an object within an image or video. Pascal VOC format
438
419
  required. The contents of this column must be a List[List[float]].
439
-
440
420
  """
441
421
 
442
422
  polygon_coordinates_column_name: str
@@ -448,17 +428,15 @@ class InstanceSegmentationPredictionColumnNames(NamedTuple):
448
428
  class InstanceSegmentationActualColumnNames(NamedTuple):
449
429
  """Used to log instance segmentation actual values that are assigned to the actual schema parameter.
450
430
 
451
- Arguments:
452
- ---------
453
- polygon_coordinates_column_name (str): Column name containing the coordinates of the
431
+ Args:
432
+ polygon_coordinates_column_name: Column name containing the coordinates of the
454
433
  polygon that locates an object within an image or video. The contents of this column
455
434
  must be a List[List[float]].
456
- categories_column_name (str): Column name containing the predefined classes or labels used
435
+ categories_column_name: Column name containing the predefined classes or labels used
457
436
  by the model to classify the detected objects. The contents of this column must be List[str].
458
- bounding_boxes_coordinates_column_name (str, optional): Column name containing the coordinates of the
437
+ bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
459
438
  rectangular outline that locates an object within an image or video. Pascal VOC format
460
439
  required. The contents of this column must be a List[List[float]].
461
-
462
440
  """
463
441
 
464
442
  polygon_coordinates_column_name: str
@@ -743,13 +721,10 @@ class InstanceSegmentationActualLabel(NamedTuple):
743
721
  class MultiClassPredictionLabel(NamedTuple):
744
722
  """Used to log multi class prediction label.
745
723
 
746
- Arguments:
747
- ---------
748
- MultiClassPredictionLabel
749
- prediction_scores (Dict[str, Union[float, int]]): the prediction scores of the classes.
750
- threshold_scores (Optional[Dict[str, Union[float, int]]]): the threshold scores of the classes.
724
+ Args:
725
+ prediction_scores: The prediction scores of the classes.
726
+ threshold_scores: The threshold scores of the classes.
751
727
  Only Multi Label will have threshold scores.
752
-
753
728
  """
754
729
 
755
730
  prediction_scores: dict[str, float | int]
@@ -848,12 +823,9 @@ class MultiClassPredictionLabel(NamedTuple):
848
823
  class MultiClassActualLabel(NamedTuple):
849
824
  """Used to log multi class actual label.
850
825
 
851
- Arguments:
852
- ---------
853
- MultiClassActualLabel
854
- actual_scores (Dict[str, Union[float, int]]): the actual scores of the classes.
855
- Any class in actual_scores with a score of 1 will be sent to arize
856
-
826
+ Args:
827
+ actual_scores: The actual scores of the classes.
828
+ Any class in actual_scores with a score of 1 will be sent to arize.
857
829
  """
858
830
 
859
831
  actual_scores: dict[str, float | int]
@@ -972,6 +944,15 @@ class RankingActualLabel(NamedTuple):
972
944
 
973
945
  @staticmethod
974
946
  def _validate_relevance_labels(relevance_labels: list[str]) -> None:
947
+ """Validate relevance labels.
948
+
949
+ Args:
950
+ relevance_labels: List of relevance labels to validate.
951
+
952
+ Raises:
953
+ TypeError: If relevance_labels is not a list of strings.
954
+ ValueError: If any label is an empty string.
955
+ """
975
956
  if not is_list_of(relevance_labels, str):
976
957
  raise TypeError("Actual Relevance Labels must be a list of strings")
977
958
  if any(label == "" for label in relevance_labels):
@@ -981,6 +962,14 @@ class RankingActualLabel(NamedTuple):
981
962
 
982
963
  @staticmethod
983
964
  def _validate_relevance_score(relevance_score: float) -> None:
965
+ """Validate relevance score.
966
+
967
+ Args:
968
+ relevance_score: Relevance score to validate.
969
+
970
+ Raises:
971
+ TypeError: If relevance_score is not a float or int.
972
+ """
984
973
  if not isinstance(relevance_score, (float, int)):
985
974
  raise TypeError("Actual Relevance score must be a float or an int")
986
975
 
@@ -1133,33 +1122,20 @@ class BaseSchema:
1133
1122
  class TypedColumns:
1134
1123
  """Optional class used for explicit type enforcement of feature and tag columns in the dataframe.
1135
1124
 
1136
- Usage:
1137
- ------
1138
- When initializing a Schema, use TypedColumns in place of a list of string column names.
1139
- e.g. feature_column_names=TypedColumns(
1140
- inferred=["feature_1", "feature_2"],
1141
- to_str=["feature_3"],
1142
- to_int=["feature_4"]
1143
- )
1125
+ When initializing a Schema, use TypedColumns in place of a list of string column names::
1144
1126
 
1145
- Fields:
1146
- -------
1147
- inferred (Optional[List[str]]): List of columns that will not be altered at all.
1148
- The values in these columns will have their type inferred as Arize validates and ingests the data.
1149
- There's no difference between passing in all column names as inferred
1150
- vs. not using TypedColumns at all.
1151
- to_str (Optional[List[str]]): List of columns that should be cast to pandas StringDType.
1152
- to_int (Optional[List[str]]): List of columns that should be cast to pandas Int64DType.
1153
- to_float (Optional[List[str]]): List of columns that should be cast to pandas Float64DType.
1127
+ feature_column_names = TypedColumns(
1128
+ inferred=["feature_1", "feature_2"],
1129
+ to_str=["feature_3"],
1130
+ to_int=["feature_4"],
1131
+ )
1154
1132
 
1155
1133
  Notes:
1156
- -----
1157
1134
  - If a TypedColumns object is included in a Schema, pandas version 1.0.0 or higher is required.
1158
1135
  - Pandas StringDType is still considered an experimental field.
1159
1136
  - Columns not present in any field will not be captured in the Schema.
1160
1137
  - StringDType, Int64DType, and Float64DType are all nullable column types.
1161
- Null values will be ingested and represented in Arize as empty values.
1162
-
1138
+ Null values will be ingested and represented in Arize as empty values.
1163
1139
  """
1164
1140
 
1165
1141
  inferred: list[str] | None = None
@@ -1188,92 +1164,80 @@ class TypedColumns:
1188
1164
  class Schema(BaseSchema):
1189
1165
  """Used to organize and map column names containing model data within your Pandas dataframe to Arize.
1190
1166
 
1191
- Arguments:
1192
- ---------
1193
- prediction_id_column_name (str, optional): Column name for the predictions unique identifier.
1167
+ Args:
1168
+ prediction_id_column_name: Column name for the predictions unique identifier.
1194
1169
  Unique IDs are used to match a prediction to delayed actuals or feature importances in Arize.
1195
1170
  If prediction ids are not provided, it will default to an empty string "" and, when possible,
1196
1171
  Arize will create a random prediction id on the server side. Prediction id must be a string column
1197
1172
  with each row indicating a unique prediction event.
1198
- feature_column_names (Union[List[str], TypedColumns], optional): Column names for features.
1173
+ feature_column_names: Column names for features.
1199
1174
  The content of feature columns can be int, float, string. If TypedColumns is used,
1200
1175
  the columns will be cast to the provided types prior to logging.
1201
- tag_column_names (Union[List[str], TypedColumns], optional): Column names for tags. The content of tag
1176
+ tag_column_names: Column names for tags. The content of tag
1202
1177
  columns can be int, float, string. If TypedColumns is used,
1203
1178
  the columns will be cast to the provided types prior to logging.
1204
- timestamp_column_name (str, optional): Column name for timestamps. The content of this
1179
+ timestamp_column_name: Column name for timestamps. The content of this
1205
1180
  column must be int Unix Timestamps in seconds.
1206
- prediction_label_column_name (str, optional): Column name for categorical prediction values.
1181
+ prediction_label_column_name: Column name for categorical prediction values.
1207
1182
  The content of this column must be convertible to string.
1208
- prediction_score_column_name (str, optional): Column name for numeric prediction values. The
1183
+ prediction_score_column_name: Column name for numeric prediction values. The
1209
1184
  content of this column must be int/float or list of dictionaries mapping class names to
1210
1185
  int/float scores in the case of MULTI_CLASS model types.
1211
- actual_label_column_name (str, optional): Column name for categorical ground truth values.
1186
+ actual_label_column_name: Column name for categorical ground truth values.
1212
1187
  The content of this column must be convertible to string.
1213
- actual_score_column_name (str, optional): Column name for numeric ground truth values. The
1188
+ actual_score_column_name: Column name for numeric ground truth values. The
1214
1189
  content of this column must be int/float or list of dictionaries mapping class names to
1215
1190
  int/float scores in the case of MULTI_CLASS model types.
1216
- shap_values_column_names (Dict[str, str], optional): Dictionary mapping feature column name
1191
+ shap_values_column_names: Dictionary mapping feature column name
1217
1192
  and corresponding SHAP feature importance column name. e.g.
1218
1193
  {{"feat_A": "feat_A_shap", "feat_B": "feat_B_shap"}}
1219
- embedding_feature_column_names (Dict[str, EmbeddingColumnNames], optional): Dictionary
1194
+ embedding_feature_column_names: Dictionary
1220
1195
  mapping embedding display names to EmbeddingColumnNames objects.
1221
- prediction_group_id_column_name (str, optional): Column name for ranking groups or lists in
1196
+ prediction_group_id_column_name: Column name for ranking groups or lists in
1222
1197
  ranking models. The content of this column must be string and is limited to 128 characters.
1223
- rank_column_name (str, optional): Column name for rank of each element on the its group or
1198
+ rank_column_name: Column name for rank of each element on the its group or
1224
1199
  list. The content of this column must be integer between 1-100.
1225
- relevance_score_column_name (str, optional): Column name for ranking model type numeric
1200
+ relevance_score_column_name: Column name for ranking model type numeric
1226
1201
  ground truth values. The content of this column must be int/float.
1227
- relevance_labels_column_name (str, optional): Column name for ranking model type categorical
1202
+ relevance_labels_column_name: Column name for ranking model type categorical
1228
1203
  ground truth values. The content of this column must be a string.
1229
- object_detection_prediction_column_names (ObjectDetectionColumnNames, optional):
1204
+ object_detection_prediction_column_names:
1230
1205
  ObjectDetectionColumnNames object containing information defining the predicted bounding
1231
1206
  boxes' coordinates, categories, and scores.
1232
- object_detection_actual_column_names (ObjectDetectionColumnNames, optional):
1207
+ object_detection_actual_column_names:
1233
1208
  ObjectDetectionColumnNames object containing information defining the actual bounding
1234
1209
  boxes' coordinates, categories, and scores.
1235
- prompt_column_names (str or EmbeddingColumnNames, optional): column names for text that is passed
1210
+ prompt_column_names: column names for text that is passed
1236
1211
  to the GENERATIVE_LLM model. It accepts a string (if sending only a text column) or
1237
1212
  EmbeddingColumnNames object containing the embedding vector data (required) and raw text
1238
1213
  (optional) for the input text your model acts on.
1239
- response_column_names (str or EmbeddingColumnNames, optional): column names for text generated by
1214
+ response_column_names: column names for text generated by
1240
1215
  the GENERATIVE_LLM model. It accepts a string (if sending only a text column) or
1241
1216
  EmbeddingColumnNames object containing the embedding vector data (required) and raw text
1242
1217
  (optional) for the text your model generates.
1243
- prompt_template_column_names (PromptTemplateColumnNames, optional): PromptTemplateColumnNames object
1218
+ prompt_template_column_names: PromptTemplateColumnNames object
1244
1219
  containing the prompt template and the prompt template version.
1245
- llm_config_column_names (LLMConfigColumnNames, optional): LLMConfigColumnNames object containing
1220
+ llm_config_column_names: LLMConfigColumnNames object containing
1246
1221
  the LLM's model name and its hyper parameters used at inference.
1247
- llm_run_metadata_column_names (LLMRunMetadataColumnNames, optional): LLMRunMetadataColumnNames
1222
+ llm_run_metadata_column_names: LLMRunMetadataColumnNames
1248
1223
  object containing token counts and latency metrics
1249
- retrieved_document_ids_column_name (str, optional): Column name for retrieved document ids.
1224
+ retrieved_document_ids_column_name: Column name for retrieved document ids.
1250
1225
  The content of this column must be lists with entries convertible to strings.
1251
- multi_class_threshold_scores_column_name (str, optional):
1226
+ multi_class_threshold_scores_column_name:
1252
1227
  Column name for dictionary that maps class names to threshold values. The
1253
1228
  content of this column must be dictionary of str -> int/float.
1254
- semantic_segmentation_prediction_column_names (SemanticSegmentationColumnNames, optional):
1229
+ semantic_segmentation_prediction_column_names:
1255
1230
  SemanticSegmentationColumnNames object containing information defining the predicted
1256
1231
  polygon coordinates and categories.
1257
- semantic_segmentation_actual_column_names (SemanticSegmentationColumnNames, optional):
1232
+ semantic_segmentation_actual_column_names:
1258
1233
  SemanticSegmentationColumnNames object containing information defining the actual
1259
1234
  polygon coordinates and categories.
1260
- instance_segmentation_prediction_column_names (InstanceSegmentationPredictionColumnNames, optional):
1235
+ instance_segmentation_prediction_column_names:
1261
1236
  InstanceSegmentationPredictionColumnNames object containing information defining the predicted
1262
1237
  polygon coordinates, categories, scores, and bounding box coordinates.
1263
- instance_segmentation_actual_column_names (InstanceSegmentationActualColumnNames, optional):
1238
+ instance_segmentation_actual_column_names:
1264
1239
  InstanceSegmentationActualColumnNames object containing information defining the actual
1265
1240
  polygon coordinates, categories, scores, and bounding box coordinates.
1266
-
1267
- Methods:
1268
- -------
1269
- replace(**changes):
1270
- Replaces fields of the schema
1271
- asdict():
1272
- Returns the schema as a dictionary. Warning: the types are not maintained, fields are
1273
- converted to strings.
1274
- get_used_columns():
1275
- Returns a set with the unique collection of columns to be used from the dataframe.
1276
-
1277
1241
  """
1278
1242
 
1279
1243
  prediction_id_column_name: str | None = None
@@ -1545,105 +1509,6 @@ class TypedValue:
1545
1509
  value: str | bool | float | int
1546
1510
 
1547
1511
 
1548
- def is_json_str(s: str) -> bool:
1549
- """Check if a string is valid JSON.
1550
-
1551
- Args:
1552
- s: The string to validate.
1553
-
1554
- Returns:
1555
- True if the string is valid JSON, False otherwise.
1556
- """
1557
- try:
1558
- json.loads(s)
1559
- except ValueError:
1560
- return False
1561
- except TypeError:
1562
- return False
1563
- return True
1564
-
1565
-
1566
- T = TypeVar("T", bound=type)
1567
-
1568
-
1569
- def is_array_of(arr: Sequence[object], tp: T) -> bool:
1570
- """Check if a value is a numpy array with all elements of a specific type.
1571
-
1572
- Args:
1573
- arr: The sequence to check.
1574
- tp: The expected type for all elements.
1575
-
1576
- Returns:
1577
- True if arr is a numpy array and all elements are of type tp.
1578
- """
1579
- return isinstance(arr, np.ndarray) and all(isinstance(x, tp) for x in arr)
1580
-
1581
-
1582
- def is_list_of(lst: Sequence[object], tp: T) -> bool:
1583
- """Check if a value is a list with all elements of a specific type.
1584
-
1585
- Args:
1586
- lst: The sequence to check.
1587
- tp: The expected type for all elements.
1588
-
1589
- Returns:
1590
- True if lst is a list and all elements are of type tp.
1591
- """
1592
- return isinstance(lst, list) and all(isinstance(x, tp) for x in lst)
1593
-
1594
-
1595
- def is_iterable_of(lst: Sequence[object], tp: T) -> bool:
1596
- """Check if a value is an iterable with all elements of a specific type.
1597
-
1598
- Args:
1599
- lst: The sequence to check.
1600
- tp: The expected type for all elements.
1601
-
1602
- Returns:
1603
- True if lst is an iterable and all elements are of type tp.
1604
- """
1605
- return isinstance(lst, Iterable) and all(isinstance(x, tp) for x in lst)
1606
-
1607
-
1608
- def is_dict_of(
1609
- d: dict[object, object],
1610
- key_allowed_types: T,
1611
- value_allowed_types: T = (),
1612
- value_list_allowed_types: T = (),
1613
- ) -> bool:
1614
- """Method to check types are valid for dictionary.
1615
-
1616
- Arguments:
1617
- ---------
1618
- d (Dict[object, object]): dictionary itself
1619
- key_allowed_types (T): all allowed types for keys of dictionary
1620
- value_allowed_types (T): all allowed types for values of dictionary
1621
- value_list_allowed_types (T): if value is a list, these are the allowed
1622
- types for value list
1623
-
1624
- Returns:
1625
- -------
1626
- True if the data types of dictionary match the types specified by the
1627
- arguments, false otherwise
1628
-
1629
- """
1630
- if value_list_allowed_types and not isinstance(
1631
- value_list_allowed_types, tuple
1632
- ):
1633
- value_list_allowed_types = (value_list_allowed_types,)
1634
-
1635
- return (
1636
- isinstance(d, dict)
1637
- and all(isinstance(k, key_allowed_types) for k in d)
1638
- and all(
1639
- isinstance(v, value_allowed_types)
1640
- or any(is_list_of(v, t) for t in value_list_allowed_types)
1641
- for v in d.values()
1642
- if value_allowed_types or value_list_allowed_types
1643
- )
1644
- )
1645
-
1646
-
1647
1512
  def _count_characters_raw_data(data: str | list[str]) -> int:
1648
1513
  character_count = 0
1649
1514
  if isinstance(data, str):
arize/pre_releases.py CHANGED
@@ -21,8 +21,9 @@ _WARNED: set[str] = set()
21
21
 
22
22
 
23
23
  def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
24
+ article = "an" if stage is ReleaseStage.ALPHA else "a"
24
25
  return (
25
- f"[{stage.upper()}] {key} is an {stage} API "
26
+ f"[{stage.upper()}] {key} is {article} {stage} API "
26
27
  f"in Arize SDK v{__version__} and may change without notice."
27
28
  )
28
29
 
arize/projects/client.py CHANGED
@@ -15,17 +15,22 @@ logger = logging.getLogger(__name__)
15
15
 
16
16
 
17
17
  class ProjectsClient:
18
- """Client for managing Arize projects and project-level operations."""
18
+ """Client for managing Arize projects and project-level operations.
19
19
 
20
- def __init__(self, *, sdk_config: SDKConfiguration) -> None:
21
- """Create a projects sub-client.
20
+ This class is primarily intended for internal use within the SDK. Users are
21
+ highly encouraged to access resource-specific functionality via
22
+ :class:`arize.ArizeClient`.
22
23
 
23
- The projects client is a thin wrapper around the generated REST API client,
24
- using the shared generated API client owned by `SDKConfiguration`.
24
+ The projects client is a thin wrapper around the generated REST API client,
25
+ using the shared generated API client owned by
26
+ :class:`arize.config.SDKConfiguration`.
27
+ """
25
28
 
29
+ def __init__(self, *, sdk_config: SDKConfiguration) -> None:
30
+ """
26
31
  Args:
27
32
  sdk_config: Resolved SDK configuration.
28
- """
33
+ """ # noqa: D205, D212
29
34
  self._sdk_config = sdk_config
30
35
 
31
36
  # Import at runtime so it's still lazy and extras-gated by the parent