arize 8.0.0a23__py3-none-any.whl → 8.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. arize/__init__.py +11 -10
  2. arize/_exporter/client.py +1 -1
  3. arize/client.py +36 -126
  4. arize/config.py +59 -100
  5. arize/datasets/client.py +11 -6
  6. arize/embeddings/nlp_generators.py +12 -6
  7. arize/embeddings/tabular_generators.py +14 -11
  8. arize/experiments/__init__.py +12 -0
  9. arize/experiments/client.py +11 -6
  10. arize/{models → ml}/batch_validation/errors.py +2 -2
  11. arize/{models → ml}/batch_validation/validator.py +5 -3
  12. arize/{models → ml}/casting.py +42 -78
  13. arize/{models → ml}/client.py +19 -17
  14. arize/{models → ml}/proto.py +2 -2
  15. arize/{models → ml}/stream_validation.py +1 -1
  16. arize/{models → ml}/surrogate_explainer/mimic.py +6 -2
  17. arize/{types.py → ml/types.py} +99 -234
  18. arize/pre_releases.py +2 -1
  19. arize/projects/client.py +11 -6
  20. arize/spans/client.py +89 -84
  21. arize/spans/conversion.py +11 -4
  22. arize/spans/validation/common/value_validation.py +1 -1
  23. arize/spans/validation/spans/dataframe_form_validation.py +1 -1
  24. arize/spans/validation/spans/value_validation.py +2 -1
  25. arize/utils/dataframe.py +1 -1
  26. arize/utils/online_tasks/dataframe_preprocessor.py +5 -6
  27. arize/utils/types.py +105 -0
  28. arize/version.py +1 -1
  29. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/METADATA +10 -4
  30. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/RECORD +37 -36
  31. /arize/{models → ml}/__init__.py +0 -0
  32. /arize/{models → ml}/batch_validation/__init__.py +0 -0
  33. /arize/{models → ml}/bounded_executor.py +0 -0
  34. /arize/{models → ml}/surrogate_explainer/__init__.py +0 -0
  35. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/WHEEL +0 -0
  36. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/LICENSE +0 -0
  37. {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/NOTICE +0 -0
@@ -9,7 +9,13 @@ from typing import TYPE_CHECKING
9
9
  import numpy as np
10
10
 
11
11
  from arize.logging import log_a_list
12
- from arize.types import ArizeTypes, Schema, TypedColumns, TypedValue, is_list_of
12
+ from arize.ml.types import (
13
+ ArizeTypes,
14
+ Schema,
15
+ TypedColumns,
16
+ TypedValue,
17
+ )
18
+ from arize.utils.types import is_list_of
13
19
 
14
20
  if TYPE_CHECKING:
15
21
  import pandas as pd
@@ -125,29 +131,20 @@ def cast_typed_columns(
125
131
  This optional feature provides a simple way for users to prevent type drift within
126
132
  a column across many SDK uploads.
127
133
 
128
- Arguments:
129
- ---------
130
- dataframe: pd.DataFrame
131
- A deepcopy of the user's dataframe.
132
- schema: Schema
133
- The schema, which may include feature and tag column names
134
+ Args:
135
+ dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
136
+ schema (Schema): The schema, which may include feature and tag column names
134
137
  in a TypedColumns object or a List[string].
135
138
 
136
139
  Returns:
137
- -------
138
- dataframe: pd.DataFrame
139
- The dataframe, with columns cast to the specified types.
140
- schema: Schema
141
- A new Schema object, with feature and tag column names converted to the List[string] format
142
- expected in downstream validation.
140
+ tuple[pd.DataFrame, Schema]: A tuple containing:
141
+ - dataframe: The dataframe, with columns cast to the specified types.
142
+ - schema: A new Schema object, with feature and tag column names converted
143
+ to the List[string] format expected in downstream validation.
143
144
 
144
145
  Raises:
145
- ------
146
- ColumnCastingError
147
- If casting fails.
148
- InvalidTypedColumnsError
149
- If the TypedColumns object is invalid.
150
-
146
+ ColumnCastingError: If casting fails.
147
+ InvalidTypedColumnsError: If the TypedColumns object is invalid.
151
148
  """
152
149
  typed_column_fields = schema.typed_column_fields()
153
150
  feature_field = "feature_column_names"
@@ -204,21 +201,14 @@ def _cast_value(
204
201
  ) -> str | int | float | list[str] | None:
205
202
  """Casts a TypedValue to its provided type, preserving all null values as None or float('nan').
206
203
 
207
- Arguments:
208
- ---------
209
- typed_value: TypedValue
210
- The TypedValue to cast.
204
+ Args:
205
+ typed_value (TypedValue): The TypedValue to cast.
211
206
 
212
207
  Returns:
213
- -------
214
- Union[str, int, float, List[str], None]
215
- The cast value.
208
+ str | int | float | list[str] | None: The cast value.
216
209
 
217
210
  Raises:
218
- ------
219
- CastingError
220
- If the value cannot be cast to the provided type.
221
-
211
+ CastingError: If the value cannot be cast to the provided type.
222
212
  """
223
213
  if typed_value.value is None:
224
214
  return None
@@ -274,18 +264,13 @@ def _validate_typed_columns(
274
264
  ) -> None:
275
265
  """Validate a TypedColumns object.
276
266
 
277
- Arguments:
278
- ---------
279
- field_name: str
280
- The name of the Schema field that the TypedColumns object is associated with.
281
- typed_columns: TypedColumns
282
- The TypedColumns object to validate.
267
+ Args:
268
+ field_name (str): The name of the Schema field that the TypedColumns object
269
+ is associated with.
270
+ typed_columns (TypedColumns): The TypedColumns object to validate.
283
271
 
284
272
  Raises:
285
- ------
286
- InvalidTypedColumnsError
287
- If the TypedColumns object is invalid.
288
-
273
+ InvalidTypedColumnsError: If the TypedColumns object is invalid.
289
274
  """
290
275
  if typed_columns.is_empty():
291
276
  raise InvalidTypedColumnsError(field_name=field_name, reason="is empty")
@@ -304,24 +289,16 @@ def _cast_columns(
304
289
 
305
290
  (feature_column_names or tag_column_names)
306
291
 
307
- Arguments:
308
- ---------
309
- dataframe: pd.DataFrame
310
- A deepcopy of the user's dataframe.
311
- columns: TypedColumns
312
- The TypedColumns object, which specifies the columns to cast
313
- (and/or to not cast) and their target types.
292
+ Args:
293
+ dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
294
+ columns (TypedColumns): The TypedColumns object, which specifies the columns
295
+ to cast (and/or to not cast) and their target types.
314
296
 
315
297
  Returns:
316
- -------
317
- dataframe: pd.DataFrame
318
- The dataframe with columns cast to the specified types.
298
+ pd.DataFrame: The dataframe with columns cast to the specified types.
319
299
 
320
300
  Raises:
321
- ------
322
- ColumnCastingError
323
- If casting fails.
324
-
301
+ ColumnCastingError: If casting fails.
325
302
  """
326
303
  if columns.to_str:
327
304
  try:
@@ -372,25 +349,17 @@ def _cast_df(
372
349
  ) -> pd.DataFrame:
373
350
  """Cast columns in a dataframe to the specified type.
374
351
 
375
- Arguments:
376
- ---------
377
- df: pd.DataFrame
378
- A deepcopy of the user's dataframe.
379
- cols: List[str]
380
- The list of column names to cast.
381
- target_type_str: str
382
- The target type to cast to.
352
+ Args:
353
+ df (pd.DataFrame): A deepcopy of the user's dataframe.
354
+ cols (list[str]): The list of column names to cast.
355
+ target_type_str (str): The target type to cast to.
383
356
 
384
357
  Returns:
385
- -------
386
- df: pd.DataFrame
387
- The dataframe with columns cast to the specified types.
358
+ pd.DataFrame: The dataframe with columns cast to the specified types.
388
359
 
389
360
  Raises:
390
- ------
391
- Exception
392
- If casting fails. Common exceptions raised by astype() are TypeError and ValueError.
393
-
361
+ Exception: If casting fails. Common exceptions raised by astype() are
362
+ TypeError and ValueError.
394
363
  """
395
364
  nan_mapping = {"nan": np.nan, "NaN": np.nan}
396
365
  df = df.replace(nan_mapping)
@@ -404,18 +373,13 @@ def _convert_schema_field_types(
404
373
  ) -> Schema:
405
374
  """Convert schema field types from TypedColumns to List[string] format.
406
375
 
407
- Arguments:
408
- ---------
409
- schema: Schema
410
- The schema, which may include feature and tag column names
376
+ Args:
377
+ schema (Schema): The schema, which may include feature and tag column names
411
378
  in a TypedColumns object or a List[string].
412
379
 
413
380
  Returns:
414
- -------
415
- schema: Schema
416
- A Schema, with feature and tag column names
417
- converted to the List[string] format expected in downstream validation.
418
-
381
+ Schema: A Schema, with feature and tag column names converted to the
382
+ List[string] format expected in downstream validation.
419
383
  """
420
384
  feature_column_names_list = (
421
385
  schema.feature_column_names
@@ -33,13 +33,13 @@ from arize.exceptions.parameters import (
33
33
  )
34
34
  from arize.exceptions.spaces import MissingSpaceIDError
35
35
  from arize.logging import get_truncation_warning_message
36
- from arize.models.bounded_executor import BoundedExecutor
37
- from arize.models.casting import cast_dictionary, cast_typed_columns
38
- from arize.models.stream_validation import (
36
+ from arize.ml.bounded_executor import BoundedExecutor
37
+ from arize.ml.casting import cast_dictionary, cast_typed_columns
38
+ from arize.ml.stream_validation import (
39
39
  validate_and_convert_prediction_id,
40
40
  validate_label,
41
41
  )
42
- from arize.types import (
42
+ from arize.ml.types import (
43
43
  CATEGORICAL_MODEL_TYPES,
44
44
  NUMERIC_MODEL_TYPES,
45
45
  ActualLabelTypes,
@@ -57,8 +57,8 @@ from arize.types import (
57
57
  SimilaritySearchParams,
58
58
  TypedValue,
59
59
  convert_element,
60
- is_list_of,
61
60
  )
61
+ from arize.utils.types import is_list_of
62
62
 
63
63
  if TYPE_CHECKING:
64
64
  import concurrent.futures as cf
@@ -95,14 +95,18 @@ _MIMIC_EXTRA = "mimic-explainer"
95
95
 
96
96
 
97
97
  class MLModelsClient:
98
- """Client for logging ML model predictions and actuals to Arize."""
98
+ """Client for logging ML model predictions and actuals to Arize.
99
99
 
100
- def __init__(self, *, sdk_config: SDKConfiguration) -> None:
101
- """Initialize the ML models client with SDK configuration.
100
+ This class is primarily intended for internal use within the SDK. Users are
101
+ highly encouraged to access resource-specific functionality via
102
+ :class:`arize.ArizeClient`.
103
+ """
102
104
 
103
- Args:
104
- sdk_config: SDK configuration containing API endpoints and credentials.
105
+ def __init__(self, *, sdk_config: SDKConfiguration) -> None:
105
106
  """
107
+ Args:
108
+ sdk_config: Resolved SDK configuration.
109
+ """ # noqa: D205, D212
106
110
  self._sdk_config = sdk_config
107
111
 
108
112
  # internal cache for the futures session
@@ -202,7 +206,7 @@ class MLModelsClient:
202
206
  """
203
207
  require(_STREAM_EXTRA, _STREAM_DEPS)
204
208
  from arize._generated.protocol.rec import public_pb2 as pb2
205
- from arize.models.proto import (
209
+ from arize.ml.proto import (
206
210
  get_pb_dictionary,
207
211
  get_pb_label,
208
212
  get_pb_timestamp,
@@ -374,9 +378,7 @@ class MLModelsClient:
374
378
  if embedding_features or prompt or response:
375
379
  # NOTE: Deep copy is necessary to avoid side effects on the original input dictionary
376
380
  combined_embedding_features = (
377
- dict(embedding_features.items())
378
- if embedding_features
379
- else {}
381
+ embedding_features.copy() if embedding_features else {}
380
382
  )
381
383
  # Map prompt as embedding features for generative models
382
384
  if prompt is not None:
@@ -523,7 +525,7 @@ class MLModelsClient:
523
525
  indexes=None,
524
526
  )
525
527
 
526
- def log_batch(
528
+ def log(
527
529
  self,
528
530
  *,
529
531
  space_id: str,
@@ -597,7 +599,7 @@ class MLModelsClient:
597
599
  import pandas.api.types as ptypes
598
600
  import pyarrow as pa
599
601
 
600
- from arize.models.batch_validation.validator import Validator
602
+ from arize.ml.batch_validation.validator import Validator
601
603
  from arize.utils.arrow import post_arrow_table
602
604
  from arize.utils.dataframe import remove_extraneous_columns
603
605
 
@@ -686,7 +688,7 @@ class MLModelsClient:
686
688
 
687
689
  if surrogate_explainability:
688
690
  require(_MIMIC_EXTRA, _MIMIC_DEPS)
689
- from arize.models.surrogate_explainer.mimic import Mimic
691
+ from arize.ml.surrogate_explainer.mimic import Mimic
690
692
 
691
693
  logger.debug("Running surrogate_explainability.")
692
694
  if schema.shap_values_column_names:
@@ -8,7 +8,7 @@ from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
8
8
 
9
9
  from arize._generated.protocol.rec import public_pb2 as pb2
10
10
  from arize.exceptions.parameters import InvalidValueType
11
- from arize.types import (
11
+ from arize.ml.types import (
12
12
  CATEGORICAL_MODEL_TYPES,
13
13
  NUMERIC_MODEL_TYPES,
14
14
  Embedding,
@@ -22,8 +22,8 @@ from arize.types import (
22
22
  RankingPredictionLabel,
23
23
  SemanticSegmentationLabel,
24
24
  convert_element,
25
- is_list_of,
26
25
  )
26
+ from arize.utils.types import is_list_of
27
27
 
28
28
 
29
29
  def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
@@ -6,7 +6,7 @@ from arize.constants.ml import MAX_PREDICTION_ID_LEN, MIN_PREDICTION_ID_LEN
6
6
  from arize.exceptions.parameters import (
7
7
  InvalidValueType,
8
8
  )
9
- from arize.types import (
9
+ from arize.ml.types import (
10
10
  CATEGORICAL_MODEL_TYPES,
11
11
  NUMERIC_MODEL_TYPES,
12
12
  ActualLabelTypes,
@@ -15,12 +15,16 @@ from interpret_community.mimic.mimic_explainer import (
15
15
  )
16
16
  from sklearn.preprocessing import LabelEncoder
17
17
 
18
- from arize.types import CATEGORICAL_MODEL_TYPES, NUMERIC_MODEL_TYPES, ModelTypes
18
+ from arize.ml.types import (
19
+ CATEGORICAL_MODEL_TYPES,
20
+ NUMERIC_MODEL_TYPES,
21
+ ModelTypes,
22
+ )
19
23
 
20
24
  if TYPE_CHECKING:
21
25
  from collections.abc import Callable
22
26
 
23
- from arize.types import Schema
27
+ from arize.ml.types import Schema
24
28
 
25
29
 
26
30
  class Mimic: