PyPI - arize - Versions diffs - 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl - Mend

arize 8.0.0a22py3-none-any.whl → 8.0.0b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

arize/__init__.py +28 -19
arize/_exporter/client.py +56 -37
arize/_exporter/parsers/tracing_data_parser.py +41 -30
arize/_exporter/validation.py +3 -3
arize/_flight/client.py +207 -76
arize/_generated/api_client/__init__.py +30 -6
arize/_generated/api_client/api/__init__.py +1 -0
arize/_generated/api_client/api/datasets_api.py +864 -190
arize/_generated/api_client/api/experiments_api.py +167 -131
arize/_generated/api_client/api/projects_api.py +1197 -0
arize/_generated/api_client/api_client.py +2 -2
arize/_generated/api_client/configuration.py +42 -34
arize/_generated/api_client/exceptions.py +2 -2
arize/_generated/api_client/models/__init__.py +15 -4
arize/_generated/api_client/models/dataset.py +10 -10
arize/_generated/api_client/models/dataset_example.py +111 -0
arize/_generated/api_client/models/dataset_example_update.py +100 -0
arize/_generated/api_client/models/dataset_version.py +13 -13
arize/_generated/api_client/models/datasets_create_request.py +16 -8
arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
arize/_generated/api_client/models/datasets_list200_response.py +10 -4
arize/_generated/api_client/models/experiment.py +14 -16
arize/_generated/api_client/models/experiment_run.py +108 -0
arize/_generated/api_client/models/experiment_run_create.py +102 -0
arize/_generated/api_client/models/experiments_create_request.py +16 -10
arize/_generated/api_client/models/experiments_list200_response.py +10 -4
arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
arize/_generated/api_client/models/primitive_value.py +172 -0
arize/_generated/api_client/models/problem.py +100 -0
arize/_generated/api_client/models/project.py +99 -0
arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
arize/_generated/api_client/models/projects_list200_response.py +106 -0
arize/_generated/api_client/rest.py +2 -2
arize/_generated/api_client/test/test_dataset.py +4 -2
arize/_generated/api_client/test/test_dataset_example.py +56 -0
arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
arize/_generated/api_client/test/test_dataset_version.py +7 -2
arize/_generated/api_client/test/test_datasets_api.py +27 -13
arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
arize/_generated/api_client/test/test_experiment.py +2 -4
arize/_generated/api_client/test/test_experiment_run.py +56 -0
arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
arize/_generated/api_client/test/test_experiments_api.py +6 -6
arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
arize/_generated/api_client/test/test_problem.py +57 -0
arize/_generated/api_client/test/test_project.py +58 -0
arize/_generated/api_client/test/test_projects_api.py +59 -0
arize/_generated/api_client/test/test_projects_create_request.py +54 -0
arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
arize/_generated/api_client_README.md +43 -29
arize/_generated/protocol/flight/flight_pb2.py +400 -0
arize/_lazy.py +27 -19
arize/client.py +181 -58
arize/config.py +324 -116
arize/constants/__init__.py +1 -0
arize/constants/config.py +11 -4
arize/constants/ml.py +6 -4
arize/constants/openinference.py +2 -0
arize/constants/pyarrow.py +2 -0
arize/constants/spans.py +3 -1
arize/datasets/__init__.py +1 -0
arize/datasets/client.py +304 -84
arize/datasets/errors.py +32 -2
arize/datasets/validation.py +18 -8
arize/embeddings/__init__.py +2 -0
arize/embeddings/auto_generator.py +23 -19
arize/embeddings/base_generators.py +89 -36
arize/embeddings/constants.py +2 -0
arize/embeddings/cv_generators.py +26 -4
arize/embeddings/errors.py +27 -5
arize/embeddings/nlp_generators.py +43 -18
arize/embeddings/tabular_generators.py +46 -31
arize/embeddings/usecases.py +12 -2
arize/exceptions/__init__.py +1 -0
arize/exceptions/auth.py +11 -1
arize/exceptions/base.py +29 -4
arize/exceptions/models.py +21 -2
arize/exceptions/parameters.py +31 -0
arize/exceptions/spaces.py +12 -1
arize/exceptions/types.py +86 -7
arize/exceptions/values.py +220 -20
arize/experiments/__init__.py +13 -0
arize/experiments/client.py +394 -285
arize/experiments/evaluators/__init__.py +1 -0
arize/experiments/evaluators/base.py +74 -41
arize/experiments/evaluators/exceptions.py +6 -3
arize/experiments/evaluators/executors.py +121 -73
arize/experiments/evaluators/rate_limiters.py +106 -57
arize/experiments/evaluators/types.py +34 -7
arize/experiments/evaluators/utils.py +65 -27
arize/experiments/functions.py +103 -101
arize/experiments/tracing.py +52 -44
arize/experiments/types.py +56 -31
arize/logging.py +54 -22
arize/ml/__init__.py +1 -0
arize/ml/batch_validation/__init__.py +1 -0
arize/{models → ml}/batch_validation/errors.py +545 -67
arize/{models → ml}/batch_validation/validator.py +344 -303
arize/ml/bounded_executor.py +47 -0
arize/{models → ml}/casting.py +118 -108
arize/{models → ml}/client.py +339 -118
arize/{models → ml}/proto.py +97 -42
arize/{models → ml}/stream_validation.py +43 -15
arize/ml/surrogate_explainer/__init__.py +1 -0
arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
arize/{types.py → ml/types.py} +355 -354
arize/pre_releases.py +44 -0
arize/projects/__init__.py +1 -0
arize/projects/client.py +134 -0
arize/regions.py +40 -0
arize/spans/__init__.py +1 -0
arize/spans/client.py +204 -175
arize/spans/columns.py +13 -0
arize/spans/conversion.py +60 -37
arize/spans/validation/__init__.py +1 -0
arize/spans/validation/annotations/__init__.py +1 -0
arize/spans/validation/annotations/annotations_validation.py +6 -4
arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
arize/spans/validation/annotations/value_validation.py +35 -11
arize/spans/validation/common/__init__.py +1 -0
arize/spans/validation/common/argument_validation.py +33 -8
arize/spans/validation/common/dataframe_form_validation.py +35 -9
arize/spans/validation/common/errors.py +211 -11
arize/spans/validation/common/value_validation.py +81 -14
arize/spans/validation/evals/__init__.py +1 -0
arize/spans/validation/evals/dataframe_form_validation.py +28 -8
arize/spans/validation/evals/evals_validation.py +34 -4
arize/spans/validation/evals/value_validation.py +26 -3
arize/spans/validation/metadata/__init__.py +1 -1
arize/spans/validation/metadata/argument_validation.py +14 -5
arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
arize/spans/validation/metadata/value_validation.py +24 -10
arize/spans/validation/spans/__init__.py +1 -0
arize/spans/validation/spans/dataframe_form_validation.py +35 -14
arize/spans/validation/spans/spans_validation.py +35 -4
arize/spans/validation/spans/value_validation.py +78 -8
arize/utils/__init__.py +1 -0
arize/utils/arrow.py +31 -15
arize/utils/cache.py +34 -6
arize/utils/dataframe.py +20 -3
arize/utils/online_tasks/__init__.py +2 -0
arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
arize/utils/openinference_conversion.py +44 -5
arize/utils/proto.py +10 -0
arize/utils/size.py +5 -3
arize/utils/types.py +105 -0
arize/version.py +3 -1
{arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
arize-8.0.0b0.dist-info/RECORD +175 -0
{arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
arize/_generated/protocol/flight/export_pb2.py +0 -61
arize/_generated/protocol/flight/ingest_pb2.py +0 -365
arize/models/__init__.py +0 -0
arize/models/batch_validation/__init__.py +0 -0
arize/models/bounded_executor.py +0 -34
arize/models/surrogate_explainer/__init__.py +0 -0
arize-8.0.0a22.dist-info/RECORD +0 -146
arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12

arize/{types.py → ml/types.py} RENAMED Viewed

@@ -1,20 +1,16 @@
-import json
+"""Common type definitions and data models used across the ML Client."""
 import logging
 import math
+from collections.abc import Iterator
 from dataclasses import asdict, dataclass, replace
 from datetime import datetime
 from decimal import Decimal
 from enum import Enum, unique
 from itertools import chain
 from typing import (
-    Dict,
-    Iterable,
-    List,
     NamedTuple,
-    Sequence,
-    Set,
-    Tuple,
-    TypeVar,
+    Self,
 )
 import numpy as np
@@ -42,12 +38,15 @@ from arize.exceptions.parameters import InvalidValueType
 # )
 # from arize.utils.errors import InvalidValueType
 from arize.logging import get_truncation_warning_message
+from arize.utils.types import is_dict_of, is_iterable_of, is_list_of
 logger = logging.getLogger(__name__)
 @unique
 class ModelTypes(Enum):
+    """Enum representing supported model types in Arize."""
     NUMERIC = 1
     SCORE_CATEGORICAL = 2
     RANKING = 3
@@ -58,7 +57,8 @@ class ModelTypes(Enum):
     MULTI_CLASS = 8
     @classmethod
-    def list_types(cls):
+    def list_types(cls) -> list[str]:
+        """Return a list of all type names in this enum."""
         return [t.name for t in cls]
@@ -70,7 +70,10 @@ CATEGORICAL_MODEL_TYPES = [
 class DocEnum(Enum):
-    def __new__(cls, value, doc=None):
+    """Enum subclass supporting inline documentation for enum members."""
+    def __new__(cls, value: object, doc: str | None = None) -> Self:
+        """Create a new enum instance with optional documentation."""
         self = object.__new__(
             cls
         )  # calling super().__new__(value) here would fail
@@ -80,13 +83,13 @@ class DocEnum(Enum):
         return self
     def __repr__(self) -> str:
+        """Return a string representation including documentation."""
         return f"{self.name} metrics include: {self.__doc__}"
 @unique
 class Metrics(DocEnum):
-    """
-    Metric groupings, used for validation of schema columns in log() call.
+    """Metric groupings, used for validation of schema columns in log() call.
     See docstring descriptions of the Enum with __doc__ or __repr__(), e.g.:
     Metrics.RANKING.__doc__
@@ -105,6 +108,8 @@ class Metrics(DocEnum):
 @unique
 class Environments(Enum):
+    """Enum representing deployment environments for models."""
     TRAINING = 1
     VALIDATION = 2
     PRODUCTION = 3
@@ -114,11 +119,18 @@ class Environments(Enum):
 @dataclass
 class EmbeddingColumnNames:
+    """Column names for embedding feature data."""
     vector_column_name: str = ""
     data_column_name: str | None = None
     link_to_data_column_name: str | None = None
-    def __post_init__(self):
+    def __post_init__(self) -> None:
+        """Validate that vector column name is specified.
+        Raises:
+            ValueError: If vector_column_name is empty.
+        """
         if not self.vector_column_name:
             raise ValueError(
                 "embedding_features require a vector to be specified. You can "
@@ -126,7 +138,8 @@ class EmbeddingColumnNames:
                 "(from arize.pandas.embeddings) if you do not have them"
             )
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the embedding column names."""
         return iter(
             (
                 self.vector_column_name,
@@ -137,24 +150,23 @@ class EmbeddingColumnNames:
 class Embedding(NamedTuple):
-    vector: List[float]
-    data: str | List[str] | None = None
+    """Container for embedding vector data with optional raw data and links."""
+    vector: list[float]
+    data: str | list[str] | None = None
     link_to_data: str | None = None
     def validate(self, emb_name: str | int | float) -> None:
-        """
-        Validates that the embedding object passed is of the correct format.
-        That is, validations must be passed for vector, data & link_to_data.
+        """Validates that the embedding object passed is of the correct format.
-        Arguments:
-        ---------
-            emb_name (str, int, float): Name of the embedding feature the
-            vector belongs to
+        Ensures validations are passed for vector, data, and link_to_data fields.
-        Raises:
-        ------
-            TypeError: If the embedding fields are of the wrong type
+        Args:
+            emb_name: Name of the embedding feature the
+                vector belongs to.
+        Raises:
+            TypeError: If the embedding fields are of the wrong type.
         """
         if self.vector is not None:
             self._validate_embedding_vector(emb_name)
@@ -167,29 +179,23 @@ class Embedding(NamedTuple):
         if self.link_to_data is not None:
             self._validate_embedding_link_to_data(emb_name, self.link_to_data)
-        return None
+        return
     def _validate_embedding_vector(
         self,
         emb_name: str | int | float,
     ) -> None:
-        """
-        Validates that the embedding vector passed is of the correct format.
-        That is:
-            1. Type must be list or convertible to list (like numpy arrays,
-            pandas Series)
-            2. List must not be empty
-            3. Elements in list must be floats
-        Arguments:
-        ---------
-            emb_name (str, int, float): Name of the embedding feature the vector
-            belongs to
+        """Validates that the embedding vector passed is of the correct format.
-        Raises:
-        ------
-            TypeError: If the embedding does not satisfy requirements above
+        Requirements: 1) Type must be list or convertible to list (like numpy arrays,
+        pandas Series), 2) List must not be empty, 3) Elements in list must be floats.
+        Args:
+            emb_name: Name of the embedding feature the vector
+                belongs to.
+        Raises:
+            TypeError: If the embedding does not satisfy requirements above.
         """
         if not Embedding._is_valid_iterable(self.vector):
             raise TypeError(
@@ -209,21 +215,19 @@ class Embedding(NamedTuple):
     @staticmethod
     def _validate_embedding_data(
-        emb_name: str | int | float, data: str | List[str]
+        emb_name: str | int | float, data: str | list[str]
     ) -> None:
-        """
-        Validates that the embedding raw data field is of the correct format. That is:
-            1. Must be string or list of strings (NLP case)
+        """Validates that the embedding raw data field is of the correct format.
-        Arguments:
-        ---------
-            emb_name (str, int, float): Name of the embedding feature the vector belongs to
-            data (str, int, float): Raw data associated with the embedding feature. Typically raw text.
+        Requirement: Must be string or list of strings (NLP case).
-        Raises:
-        ------
-            TypeError: If the embedding does not satisfy requirements above
+        Args:
+            emb_name: Name of the embedding feature the vector belongs to.
+            data: Raw data associated with the embedding feature.
+                Typically raw text.
+        Raises:
+            TypeError: If the embedding does not satisfy requirements above.
         """
         # Validate that data is a string or iterable of strings
         is_string = isinstance(data, str)
@@ -247,7 +251,7 @@ class Embedding(NamedTuple):
                 f"Embedding data field must not contain more than {MAX_RAW_DATA_CHARACTERS} characters. "
                 f"Found {character_count}."
             )
-        elif character_count > MAX_RAW_DATA_CHARACTERS_TRUNCATION:
+        if character_count > MAX_RAW_DATA_CHARACTERS_TRUNCATION:
             logger.warning(
                 get_truncation_warning_message(
                     "Embedding raw data fields",
@@ -259,20 +263,17 @@ class Embedding(NamedTuple):
     def _validate_embedding_link_to_data(
         emb_name: str | int | float, link_to_data: str
     ) -> None:
-        """
-        Validates that the embedding link to data field is of the correct format. That is:
-            1. Must be string
+        """Validates that the embedding link to data field is of the correct format.
-        Arguments:
-        ---------
-            emb_name (str, int, float): Name of the embedding feature the vector belongs to
-            link_to_data (str): Link to source data of embedding feature, typically an image file on
-                cloud storage
+        Requirement: Must be string.
-        Raises:
-        ------
-            TypeError: If the embedding does not satisfy requirements above
+        Args:
+            emb_name: Name of the embedding feature the vector belongs to.
+            link_to_data: Link to source data of embedding feature, typically an
+                image file on cloud storage.
+        Raises:
+            TypeError: If the embedding does not satisfy requirements above.
         """
         if not isinstance(link_to_data, str):
             raise TypeError(
@@ -282,22 +283,18 @@ class Embedding(NamedTuple):
     @staticmethod
     def _is_valid_iterable(
-        data: str | List[str] | List[float] | np.ndarray,
+        data: str | list[str] | list[float] | np.ndarray,
     ) -> bool:
-        """
-        Validates that the input data field is of the correct iterable type. That is:
-            1. List or
-            2. numpy array or
-            3. pandas Series
+        """Validates that the input data field is of the correct iterable type.
-        Arguments:
-        ---------
-            data: input iterable
+        Accepted types: 1) List, 2) numpy array, or 3) pandas Series.
-        Returns:
-        -------
-            True if the data type is one of the accepted iterable types, false otherwise
+        Args:
+            data: Input iterable.
+        Returns:
+            True if the data type is one of the accepted iterable types,
+                false otherwise.
         """
         return any(isinstance(data, t) for t in (list, np.ndarray))
@@ -327,12 +324,15 @@ class Embedding(NamedTuple):
 class LLMRunMetadata(NamedTuple):
+    """Metadata for LLM execution including token counts and latency."""
     total_token_count: int | None = None
     prompt_token_count: int | None = None
     response_token_count: int | None = None
     response_latency_ms: int | float | None = None
     def validate(self) -> None:
+        """Validate the field values and constraints."""
         allowed_types = (int, float, np.int16, np.int32, np.float16, np.float32)
         if not isinstance(self.total_token_count, allowed_types):
             raise InvalidValueType(
@@ -361,22 +361,20 @@ class LLMRunMetadata(NamedTuple):
 class ObjectDetectionColumnNames(NamedTuple):
-    """
-    Used to log object detection prediction and actual values that are assigned to the prediction or
-    actual schema parameter.
+    """Used to log object detection prediction and actual values.
+    These values are assigned to the prediction or actual schema parameter.
-    Arguments:
-    ---------
-        bounding_boxes_coordinates_column_name (str): Column name containing the coordinates of the
+    Args:
+        bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
             rectangular outline that locates an object within an image or video. Pascal VOC format
             required. The contents of this column must be a List[List[float]].
-        categories_column_name (str): Column name containing the predefined classes or labels used
+        categories_column_name: Column name containing the predefined classes or labels used
             by the model to classify the detected objects. The contents of this column must be List[str].
-        scores_column_names (str, optional): Column name containing the confidence scores that the
+        scores_column_names: Column name containing the confidence scores that the
             model assigns to it's predictions, indicating how certain the model is that the predicted
             class is contained within the bounding box. This argument is only applicable for prediction
             values. The contents of this column must be List[float].
     """
     bounding_boxes_coordinates_column_name: str
@@ -385,19 +383,17 @@ class ObjectDetectionColumnNames(NamedTuple):
 class SemanticSegmentationColumnNames(NamedTuple):
-    """
-    Used to log semantic segmentation prediction and actual values that are assigned to the prediction or
-    actual schema parameter.
+    """Used to log semantic segmentation prediction and actual values.
-    Arguments:
-    ---------
-        polygon_coordinates_column_name (str): Column name containing the coordinates of the vertices
+    These values are assigned to the prediction or actual schema parameter.
+    Args:
+        polygon_coordinates_column_name: Column name containing the coordinates of the vertices
             of the polygon mask within an image or video. The first sublist contains the
             coordinates of the outline of the polygon. The subsequent sublists contain the coordinates
             of any cutouts within the polygon. The contents of this column must be a List[List[float]].
-        categories_column_name (str): Column name containing the predefined classes or labels used
+        categories_column_name: Column name containing the predefined classes or labels used
             by the model to classify the detected objects. The contents of this column must be List[str].
     """
     polygon_coordinates_column_name: str
@@ -405,25 +401,22 @@ class SemanticSegmentationColumnNames(NamedTuple):
 class InstanceSegmentationPredictionColumnNames(NamedTuple):
-    """
-    Used to log instance segmentation prediction values that are assigned to the prediction schema parameter.
+    """Used to log instance segmentation prediction values for the prediction schema parameter.
-    Arguments:
-    ---------
-        polygon_coordinates_column_name (str): Column name containing the coordinates of the vertices
+    Args:
+        polygon_coordinates_column_name: Column name containing the coordinates of the vertices
             of the polygon mask within an image or video. The first sublist contains the
             coordinates of the outline of the polygon. The subsequent sublists contain the coordinates
             of any cutouts within the polygon. The contents of this column must be a List[List[float]].
-        categories_column_name (str): Column name containing the predefined classes or labels used
+        categories_column_name: Column name containing the predefined classes or labels used
             by the model to classify the detected objects. The contents of this column must be List[str].
-        scores_column_name (str, optional): Column name containing the confidence scores that the
+        scores_column_name: Column name containing the confidence scores that the
             model assigns to it's predictions, indicating how certain the model is that the predicted
             class is contained within the bounding box. This argument is only applicable for prediction
             values. The contents of this column must be List[float].
-        bounding_boxes_coordinates_column_name (str, optional): Column name containing the coordinates of the
+        bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
             rectangular outline that locates an object within an image or video. Pascal VOC format
             required. The contents of this column must be a List[List[float]].
     """
     polygon_coordinates_column_name: str
@@ -433,20 +426,17 @@ class InstanceSegmentationPredictionColumnNames(NamedTuple):
 class InstanceSegmentationActualColumnNames(NamedTuple):
-    """
-    Used to log instance segmentation actual values that are assigned to the actual schema parameter.
+    """Used to log instance segmentation actual values that are assigned to the actual schema parameter.
-    Arguments:
-    ---------
-        polygon_coordinates_column_name (str): Column name containing the coordinates of the
+    Args:
+        polygon_coordinates_column_name: Column name containing the coordinates of the
             polygon that locates an object within an image or video. The contents of this column
             must be a List[List[float]].
-        categories_column_name (str): Column name containing the predefined classes or labels used
+        categories_column_name: Column name containing the predefined classes or labels used
             by the model to classify the detected objects. The contents of this column must be List[str].
-        bounding_boxes_coordinates_column_name (str, optional): Column name containing the coordinates of the
+        bounding_boxes_coordinates_column_name: Column name containing the coordinates of the
             rectangular outline that locates an object within an image or video. Pascal VOC format
             required. The contents of this column must be a List[List[float]].
     """
     polygon_coordinates_column_name: str
@@ -455,12 +445,15 @@ class InstanceSegmentationActualColumnNames(NamedTuple):
 class ObjectDetectionLabel(NamedTuple):
-    bounding_boxes_coordinates: List[List[float]]
-    categories: List[str]
+    """Label data for object detection tasks with bounding boxes and categories."""
+    bounding_boxes_coordinates: list[list[float]]
+    categories: list[str]
     # Actual Object Detection Labels won't have scores
-    scores: List[float] | None = None
+    scores: list[float] | None = None
-    def validate(self, prediction_or_actual: str):
+    def validate(self, prediction_or_actual: str) -> None:
+        """Validate the object detection label fields and constraints."""
         # Validate bounding boxes
         self._validate_bounding_boxes_coordinates()
         # Validate categories
@@ -470,7 +463,7 @@ class ObjectDetectionLabel(NamedTuple):
         # Validate we have the same number of bounding boxes, categories and scores
         self._validate_count_match()
-    def _validate_bounding_boxes_coordinates(self):
+    def _validate_bounding_boxes_coordinates(self) -> None:
         if not is_list_of(self.bounding_boxes_coordinates, list):
             raise TypeError(
                 "Object Detection Label bounding boxes must be a list of lists of floats"
@@ -478,14 +471,14 @@ class ObjectDetectionLabel(NamedTuple):
         for coordinates in self.bounding_boxes_coordinates:
             _validate_bounding_box_coordinates(coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Object Detection Label categories must be a list of strings"
             )
-    def _validate_scores(self, prediction_or_actual: str):
+    def _validate_scores(self, prediction_or_actual: str) -> None:
         if self.scores is None:
             if prediction_or_actual == "prediction":
                 raise ValueError(
@@ -507,7 +500,7 @@ class ObjectDetectionLabel(NamedTuple):
                     f"{self.scores}"
                 )
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_bounding_boxes = len(self.bounding_boxes_coordinates)
         if n_bounding_boxes == 0:
             raise ValueError(
@@ -534,10 +527,13 @@ class ObjectDetectionLabel(NamedTuple):
 class SemanticSegmentationLabel(NamedTuple):
-    polygon_coordinates: List[List[float]]
-    categories: List[str]
+    """Label data for semantic segmentation with polygon coordinates and categories."""
-    def validate(self):
+    polygon_coordinates: list[list[float]]
+    categories: list[str]
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate polygon coordinates
         self._validate_polygon_coordinates()
         # Validate categories
@@ -545,17 +541,17 @@ class SemanticSegmentationLabel(NamedTuple):
         # Validate we have the same number of polygon coordinates and categories
         self._validate_count_match()
-    def _validate_polygon_coordinates(self):
+    def _validate_polygon_coordinates(self) -> None:
         _validate_polygon_coordinates(self.polygon_coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Semantic Segmentation Label categories must be a list of strings"
             )
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_polygon_coordinates = len(self.polygon_coordinates)
         if n_polygon_coordinates == 0:
             raise ValueError(
@@ -573,12 +569,15 @@ class SemanticSegmentationLabel(NamedTuple):
 class InstanceSegmentationPredictionLabel(NamedTuple):
-    polygon_coordinates: List[List[float]]
-    categories: List[str]
-    scores: List[float] | None = None
-    bounding_boxes_coordinates: List[List[float]] | None = None
+    """Prediction label for instance segmentation with polygons and category information."""
-    def validate(self):
+    polygon_coordinates: list[list[float]]
+    categories: list[str]
+    scores: list[float] | None = None
+    bounding_boxes_coordinates: list[list[float]] | None = None
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate polygon coordinates
         self._validate_polygon_coordinates()
         # Validate categories
@@ -590,17 +589,17 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
         # Validate we have the same number of polygon coordinates and categories
         self._validate_count_match()
-    def _validate_polygon_coordinates(self):
+    def _validate_polygon_coordinates(self) -> None:
         _validate_polygon_coordinates(self.polygon_coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Instance Segmentation Prediction Label categories must be a list of strings"
             )
-    def _validate_scores(self):
+    def _validate_scores(self) -> None:
         if self.scores is not None:
             if not is_list_of(self.scores, float):
                 raise TypeError(
@@ -613,7 +612,7 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
                     f"{self.scores}"
                 )
-    def _validate_bounding_boxes(self):
+    def _validate_bounding_boxes(self) -> None:
         if self.bounding_boxes_coordinates is not None:
             if not is_list_of(self.bounding_boxes_coordinates, list):
                 raise TypeError(
@@ -622,7 +621,7 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
             for coordinates in self.bounding_boxes_coordinates:
                 _validate_bounding_box_coordinates(coordinates)
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_polygon_coordinates = len(self.polygon_coordinates)
         if n_polygon_coordinates == 0:
             raise ValueError(
@@ -657,11 +656,14 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
 class InstanceSegmentationActualLabel(NamedTuple):
-    polygon_coordinates: List[List[float]]
-    categories: List[str]
-    bounding_boxes_coordinates: List[List[float]] | None = None
+    """Actual label for instance segmentation with polygon coordinates and categories."""
-    def validate(self):
+    polygon_coordinates: list[list[float]]
+    categories: list[str]
+    bounding_boxes_coordinates: list[list[float]] | None = None
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate polygon coordinates
         self._validate_polygon_coordinates()
         # Validate categories
@@ -671,17 +673,17 @@ class InstanceSegmentationActualLabel(NamedTuple):
         # Validate we have the same number of polygon coordinates and categories
         self._validate_count_match()
-    def _validate_polygon_coordinates(self):
+    def _validate_polygon_coordinates(self) -> None:
         _validate_polygon_coordinates(self.polygon_coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Instance Segmentation Actual Label categories must be a list of strings"
             )
-    def _validate_bounding_boxes(self):
+    def _validate_bounding_boxes(self) -> None:
         if self.bounding_boxes_coordinates is not None:
             if not is_list_of(self.bounding_boxes_coordinates, list):
                 raise TypeError(
@@ -690,7 +692,7 @@ class InstanceSegmentationActualLabel(NamedTuple):
             for coordinates in self.bounding_boxes_coordinates:
                 _validate_bounding_box_coordinates(coordinates)
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_polygon_coordinates = len(self.polygon_coordinates)
         if n_polygon_coordinates == 0:
             raise ValueError(
@@ -717,27 +719,24 @@ class InstanceSegmentationActualLabel(NamedTuple):
 class MultiClassPredictionLabel(NamedTuple):
-    """
-    Used to log multi class prediction label
+    """Used to log multi class prediction label.
-    Arguments:
-    ---------
-    MultiClassPredictionLabel
-        prediction_scores (Dict[str, Union[float, int]]): the prediction scores of the classes.
-        threshold_scores (Optional[Dict[str, Union[float, int]]]): the threshold scores of the classes.
+    Args:
+        prediction_scores: The prediction scores of the classes.
+        threshold_scores: The threshold scores of the classes.
             Only Multi Label will have threshold scores.
     """
-    prediction_scores: Dict[str, float | int]
-    threshold_scores: Dict[str, float | int] | None = None
+    prediction_scores: dict[str, float | int]
+    threshold_scores: dict[str, float | int] | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate scores
         self._validate_prediction_scores()
         self._validate_threshold_scores()
-    def _validate_prediction_scores(self):
+    def _validate_prediction_scores(self) -> None:
         # prediction dictionary validations
         if not is_dict_of(
             self.prediction_scores,
@@ -778,7 +777,7 @@ class MultiClassPredictionLabel(NamedTuple):
                     "invalid. All scores (values in dictionary) must be between 0 and 1, inclusive."
                 )
-    def _validate_threshold_scores(self):
+    def _validate_threshold_scores(self) -> None:
         if self.threshold_scores is None or len(self.threshold_scores) == 0:
             return
         if not is_dict_of(
@@ -822,24 +821,21 @@ class MultiClassPredictionLabel(NamedTuple):
 class MultiClassActualLabel(NamedTuple):
-    """
-    Used to log multi class actual label
-    Arguments:
-    ---------
-    MultiClassActualLabel
-        actual_scores (Dict[str, Union[float, int]]): the actual scores of the classes.
-        Any class in actual_scores with a score of 1 will be sent to arize
+    """Used to log multi class actual label.
+    Args:
+        actual_scores: The actual scores of the classes.
+            Any class in actual_scores with a score of 1 will be sent to arize.
     """
-    actual_scores: Dict[str, float | int]
+    actual_scores: dict[str, float | int]
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate scores
         self._validate_actual_scores()
-    def _validate_actual_scores(self):
+    def _validate_actual_scores(self) -> None:
         if not is_dict_of(
             self.actual_scores,
             key_allowed_types=str,
@@ -879,12 +875,15 @@ class MultiClassActualLabel(NamedTuple):
 class RankingPredictionLabel(NamedTuple):
+    """Prediction label for ranking tasks with group and rank information."""
     group_id: str
     rank: int
     score: float | None = None
     label: str | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate existence of required fields: prediction_group_id and rank
         if self.group_id is None or self.rank is None:
             raise ValueError(
@@ -901,7 +900,7 @@ class RankingPredictionLabel(NamedTuple):
         if self.score is not None:
             self._validate_score()
-    def _validate_group_id(self):
+    def _validate_group_id(self) -> None:
         if not isinstance(self.group_id, str):
             raise TypeError("Prediction Group ID must be a string")
         if not (1 <= len(self.group_id) <= 36):
@@ -909,7 +908,7 @@ class RankingPredictionLabel(NamedTuple):
                 f"Prediction Group ID must have length between 1 and 36. Found {len(self.group_id)}"
             )
-    def _validate_rank(self):
+    def _validate_rank(self) -> None:
         if not isinstance(self.rank, int):
             raise TypeError("Prediction Rank must be an int")
         if not (1 <= self.rank <= 100):
@@ -917,22 +916,25 @@ class RankingPredictionLabel(NamedTuple):
                 f"Prediction Rank must be between 1 and 100, inclusive. Found {self.rank}"
             )
-    def _validate_label(self):
+    def _validate_label(self) -> None:
         if not isinstance(self.label, str):
             raise TypeError("Prediction Label must be a str")
         if self.label == "":
             raise ValueError("Prediction Label must not be an empty string.")
-    def _validate_score(self):
+    def _validate_score(self) -> None:
         if not isinstance(self.score, (float, int)):
             raise TypeError("Prediction Score must be a float or an int")
 class RankingActualLabel(NamedTuple):
-    relevance_labels: List[str] | None = None
+    """Actual label for ranking tasks with relevance information."""
+    relevance_labels: list[str] | None = None
     relevance_score: float | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate relevance_labels type
         if self.relevance_labels is not None:
             self._validate_relevance_labels(self.relevance_labels)
@@ -941,7 +943,16 @@ class RankingActualLabel(NamedTuple):
             self._validate_relevance_score(self.relevance_score)
     @staticmethod
-    def _validate_relevance_labels(relevance_labels: List[str]):
+    def _validate_relevance_labels(relevance_labels: list[str]) -> None:
+        """Validate relevance labels.
+        Args:
+            relevance_labels: List of relevance labels to validate.
+        Raises:
+            TypeError: If relevance_labels is not a list of strings.
+            ValueError: If any label is an empty string.
+        """
         if not is_list_of(relevance_labels, str):
             raise TypeError("Actual Relevance Labels must be a list of strings")
         if any(label == "" for label in relevance_labels):
@@ -950,17 +961,28 @@ class RankingActualLabel(NamedTuple):
             )
     @staticmethod
-    def _validate_relevance_score(relevance_score: float):
+    def _validate_relevance_score(relevance_score: float) -> None:
+        """Validate relevance score.
+        Args:
+            relevance_score: Relevance score to validate.
+        Raises:
+            TypeError: If relevance_score is not a float or int.
+        """
         if not isinstance(relevance_score, (float, int)):
             raise TypeError("Actual Relevance score must be a float or an int")
 @dataclass
 class PromptTemplateColumnNames:
+    """Column names for prompt template configuration in LLM schemas."""
     template_column_name: str | None = None
     template_version_column_name: str | None = None
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the prompt template column names."""
         return iter(
             (self.template_column_name, self.template_version_column_name)
         )
@@ -968,21 +990,27 @@ class PromptTemplateColumnNames:
 @dataclass
 class LLMConfigColumnNames:
+    """Column names for LLM configuration parameters in schemas."""
     model_column_name: str | None = None
     params_column_name: str | None = None
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the LLM config column names."""
         return iter((self.model_column_name, self.params_column_name))
 @dataclass
 class LLMRunMetadataColumnNames:
+    """Column names for LLM run metadata fields in schemas."""
     total_token_count_column_name: str | None = None
     prompt_token_count_column_name: str | None = None
     response_token_count_column_name: str | None = None
     response_latency_ms_column_name: str | None = None
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the LLM run metadata column names."""
         return iter(
             (
                 self.total_token_count_column_name,
@@ -1011,11 +1039,19 @@ class LLMRunMetadataColumnNames:
 #
 @dataclass
 class SimilarityReference:
+    """Reference to a prediction for similarity search operations."""
     prediction_id: str
     reference_column_name: str
     prediction_timestamp: datetime | None = None
-    def __post_init__(self):
+    def __post_init__(self) -> None:
+        """Validate similarity reference fields after initialization.
+        Raises:
+            ValueError: If prediction_id or reference_column_name is empty.
+            TypeError: If prediction_timestamp is not a datetime object.
+        """
         if self.prediction_id == "":
             raise ValueError("prediction id cannot be empty")
         if self.reference_column_name == "":
@@ -1028,11 +1064,20 @@ class SimilarityReference:
 @dataclass
 class SimilaritySearchParams:
-    references: List[SimilarityReference]
+    """Parameters for configuring similarity search operations."""
+    references: list[SimilarityReference]
     search_column_name: str
     threshold: float = 0
-    def __post_init__(self):
+    def __post_init__(self) -> None:
+        """Validate similarity search parameters after initialization.
+        Raises:
+            ValueError: If references list is invalid, search_column_name is
+                empty, or threshold is out of range.
+            TypeError: If any reference is not a SimilarityReference instance.
+        """
         if (
             not self.references
             or len(self.references) <= 0
@@ -1054,176 +1099,157 @@ class SimilaritySearchParams:
 @dataclass(frozen=True)
 class BaseSchema:
-    def replace(self, **changes):
+    """Base class for all schema definitions with immutable fields."""
+    def replace(self, **changes: object) -> Self:
+        """Return a new instance with specified fields replaced."""
         return replace(self, **changes)
-    def asdict(self) -> Dict[str, str]:
+    def asdict(self) -> dict[str, str]:
+        """Convert the schema to a dictionary."""
         return asdict(self)
-    def get_used_columns(self) -> Set[str]:
+    def get_used_columns(self) -> set[str]:
+        """Return the set of column names used in this schema."""
         return set(self.get_used_columns_counts().keys())
-    def get_used_columns_counts(self) -> Dict[str, int]:
+    def get_used_columns_counts(self) -> dict[str, int]:
+        """Return a dict mapping column names to their usage count."""
         raise NotImplementedError()
 @dataclass(frozen=True)
 class TypedColumns:
-    """
-    Optional class used for explicit type enforcement of feature and tag columns in the dataframe.
-    Usage:
-    ------
-        When initializing a Schema, use TypedColumns in place of a list of string column names.
-        e.g. feature_column_names=TypedColumns(
-                inferred=["feature_1", "feature_2"],
-                to_str=["feature_3"],
-                to_int=["feature_4"]
-            )
+    """Optional class used for explicit type enforcement of feature and tag columns in the dataframe.
+    When initializing a Schema, use TypedColumns in place of a list of string column names::
-    Fields:
-    -------
-        inferred (Optional[List[str]]): List of columns that will not be altered at all.
-            The values in these columns will have their type inferred as Arize validates and ingests the data.
-            There's no difference between passing in all column names as inferred
-            vs. not using TypedColumns at all.
-        to_str (Optional[List[str]]): List of columns that should be cast to pandas StringDType.
-        to_int (Optional[List[str]]): List of columns that should be cast to pandas Int64DType.
-        to_float (Optional[List[str]]): List of columns that should be cast to pandas Float64DType.
+        feature_column_names = TypedColumns(
+            inferred=["feature_1", "feature_2"],
+            to_str=["feature_3"],
+            to_int=["feature_4"],
+        )
     Notes:
-    -----
         - If a TypedColumns object is included in a Schema, pandas version 1.0.0 or higher is required.
         - Pandas StringDType is still considered an experimental field.
         - Columns not present in any field will not be captured in the Schema.
         - StringDType, Int64DType, and Float64DType are all nullable column types.
-        Null values will be ingested and represented in Arize as empty values.
+          Null values will be ingested and represented in Arize as empty values.
     """
-    inferred: List[str] | None = None
-    to_str: List[str] | None = None
-    to_int: List[str] | None = None
-    to_float: List[str] | None = None
+    inferred: list[str] | None = None
+    to_str: list[str] | None = None
+    to_int: list[str] | None = None
+    to_float: list[str] | None = None
-    def get_all_column_names(self) -> List[str]:
+    def get_all_column_names(self) -> list[str]:
+        """Return all column names across all conversion lists."""
         return list(chain.from_iterable(filter(None, self.__dict__.values())))
-    def has_duplicate_columns(self) -> Tuple[bool, Set[str]]:
+    def has_duplicate_columns(self) -> tuple[bool, set[str]]:
+        """Check for duplicate columns and return (has_duplicates, duplicate_set)."""
         # True if there are duplicates within a field's list or across fields.
         # Return a set of the duplicate column names.
         cols = self.get_all_column_names()
-        duplicates = set([x for x in cols if cols.count(x) > 1])
+        duplicates = {x for x in cols if cols.count(x) > 1}
         return len(duplicates) > 0, duplicates
     def is_empty(self) -> bool:
+        """Return True if no columns are configured for conversion."""
         return not self.get_all_column_names()
 @dataclass(frozen=True)
 class Schema(BaseSchema):
-    """
-    Used to organize and map column names containing model data within your Pandas dataframe to
-    Arize.
+    """Used to organize and map column names containing model data within your Pandas dataframe to Arize.
-    Arguments:
-    ---------
-        prediction_id_column_name (str, optional): Column name for the predictions unique identifier.
+    Args:
+        prediction_id_column_name: Column name for the predictions unique identifier.
             Unique IDs are used to match a prediction to delayed actuals or feature importances in Arize.
             If prediction ids are not provided, it will default to an empty string "" and, when possible,
             Arize will create a random prediction id on the server side. Prediction id must be a string column
             with each row indicating a unique prediction event.
-        feature_column_names (Union[List[str], TypedColumns], optional): Column names for features.
+        feature_column_names: Column names for features.
             The content of feature columns can be int, float, string. If TypedColumns is used,
             the columns will be cast to the provided types prior to logging.
-        tag_column_names (Union[List[str], TypedColumns], optional): Column names for tags. The content of tag
+        tag_column_names: Column names for tags. The content of tag
             columns can be int, float, string. If TypedColumns is used,
             the columns will be cast to the provided types prior to logging.
-        timestamp_column_name (str, optional): Column name for timestamps. The content of this
+        timestamp_column_name: Column name for timestamps. The content of this
             column must be int Unix Timestamps in seconds.
-        prediction_label_column_name (str, optional): Column name for categorical prediction values.
+        prediction_label_column_name: Column name for categorical prediction values.
             The content of this column must be convertible to string.
-        prediction_score_column_name (str, optional): Column name for numeric prediction values. The
+        prediction_score_column_name: Column name for numeric prediction values. The
             content of this column must be int/float or list of dictionaries mapping class names to
             int/float scores in the case of MULTI_CLASS model types.
-        actual_label_column_name (str, optional): Column name for categorical ground truth values.
+        actual_label_column_name: Column name for categorical ground truth values.
             The content of this column must be convertible to string.
-        actual_score_column_name (str, optional): Column name for numeric ground truth values. The
+        actual_score_column_name: Column name for numeric ground truth values. The
             content of this column must be int/float or list of dictionaries mapping class names to
             int/float scores in the case of MULTI_CLASS model types.
-        shap_values_column_names (Dict[str, str], optional): Dictionary mapping feature column name
+        shap_values_column_names: Dictionary mapping feature column name
             and corresponding SHAP feature importance column name. e.g.
             {{"feat_A": "feat_A_shap", "feat_B": "feat_B_shap"}}
-        embedding_feature_column_names (Dict[str, EmbeddingColumnNames], optional): Dictionary
+        embedding_feature_column_names: Dictionary
             mapping embedding display names to EmbeddingColumnNames objects.
-        prediction_group_id_column_name (str, optional): Column name for ranking groups or lists in
+        prediction_group_id_column_name: Column name for ranking groups or lists in
             ranking models. The content of this column must be string and is limited to 128 characters.
-        rank_column_name (str, optional): Column name for rank of each element on the its group or
+        rank_column_name: Column name for rank of each element on the its group or
             list. The content of this column must be integer between 1-100.
-        relevance_score_column_name (str, optional): Column name for ranking model type numeric
+        relevance_score_column_name: Column name for ranking model type numeric
             ground truth values. The content of this column must be int/float.
-        relevance_labels_column_name (str, optional): Column name for ranking model type categorical
+        relevance_labels_column_name: Column name for ranking model type categorical
             ground truth values. The content of this column must be a string.
-        object_detection_prediction_column_names (ObjectDetectionColumnNames, optional):
+        object_detection_prediction_column_names:
             ObjectDetectionColumnNames object containing information defining the predicted bounding
             boxes' coordinates, categories, and scores.
-        object_detection_actual_column_names (ObjectDetectionColumnNames, optional):
+        object_detection_actual_column_names:
             ObjectDetectionColumnNames object containing information defining the actual bounding
             boxes' coordinates, categories, and scores.
-        prompt_column_names (str or EmbeddingColumnNames, optional): column names for text that is passed
+        prompt_column_names: column names for text that is passed
             to the GENERATIVE_LLM model. It accepts a string (if sending only a text column) or
             EmbeddingColumnNames object containing the embedding vector data (required) and raw text
             (optional) for the input text your model acts on.
-        response_column_names (str or EmbeddingColumnNames, optional): column names for text generated by
+        response_column_names: column names for text generated by
             the GENERATIVE_LLM model. It accepts a string (if sending only a text column) or
             EmbeddingColumnNames object containing the embedding vector data (required) and raw text
             (optional) for the text your model generates.
-        prompt_template_column_names (PromptTemplateColumnNames, optional): PromptTemplateColumnNames object
+        prompt_template_column_names: PromptTemplateColumnNames object
             containing the prompt template and the prompt template version.
-        llm_config_column_names (LLMConfigColumnNames, optional): LLMConfigColumnNames object containing
+        llm_config_column_names: LLMConfigColumnNames object containing
             the LLM's model name and its hyper parameters used at inference.
-        llm_run_metadata_column_names (LLMRunMetadataColumnNames, optional): LLMRunMetadataColumnNames
+        llm_run_metadata_column_names: LLMRunMetadataColumnNames
             object containing token counts and latency metrics
-        retrieved_document_ids_column_name (str, optional): Column name for retrieved document ids.
+        retrieved_document_ids_column_name: Column name for retrieved document ids.
             The content of this column must be lists with entries convertible to strings.
-        multi_class_threshold_scores_column_name (str, optional):
+        multi_class_threshold_scores_column_name:
             Column name for dictionary that maps class names to threshold values. The
             content of this column must be dictionary of str -> int/float.
-        semantic_segmentation_prediction_column_names (SemanticSegmentationColumnNames, optional):
+        semantic_segmentation_prediction_column_names:
             SemanticSegmentationColumnNames object containing information defining the predicted
             polygon coordinates and categories.
-        semantic_segmentation_actual_column_names (SemanticSegmentationColumnNames, optional):
+        semantic_segmentation_actual_column_names:
             SemanticSegmentationColumnNames object containing information defining the actual
             polygon coordinates and categories.
-        instance_segmentation_prediction_column_names (InstanceSegmentationPredictionColumnNames, optional):
+        instance_segmentation_prediction_column_names:
             InstanceSegmentationPredictionColumnNames object containing information defining the predicted
             polygon coordinates, categories, scores, and bounding box coordinates.
-        instance_segmentation_actual_column_names (InstanceSegmentationActualColumnNames, optional):
+        instance_segmentation_actual_column_names:
             InstanceSegmentationActualColumnNames object containing information defining the actual
             polygon coordinates, categories, scores, and bounding box coordinates.
-    Methods:
-    -------
-        replace(**changes):
-            Replaces fields of the schema
-        asdict():
-            Returns the schema as a dictionary. Warning: the types are not maintained, fields are
-            converted to strings.
-        get_used_columns():
-            Returns a set with the unique collection of columns to be used from the dataframe.
     """
     prediction_id_column_name: str | None = None
-    feature_column_names: List[str] | TypedColumns | None = None
-    tag_column_names: List[str] | TypedColumns | None = None
+    feature_column_names: list[str] | TypedColumns | None = None
+    tag_column_names: list[str] | TypedColumns | None = None
     timestamp_column_name: str | None = None
     prediction_label_column_name: str | None = None
     prediction_score_column_name: str | None = None
     actual_label_column_name: str | None = None
     actual_score_column_name: str | None = None
-    shap_values_column_names: Dict[str, str] | None = None
-    embedding_feature_column_names: Dict[str, EmbeddingColumnNames] | None = (
+    shap_values_column_names: dict[str, str] | None = None
+    embedding_feature_column_names: dict[str, EmbeddingColumnNames] | None = (
         None  # type:ignore
     )
     prediction_group_id_column_name: str | None = None
@@ -1242,7 +1268,7 @@ class Schema(BaseSchema):
     prompt_template_column_names: PromptTemplateColumnNames | None = None
     llm_config_column_names: LLMConfigColumnNames | None = None
     llm_run_metadata_column_names: LLMRunMetadataColumnNames | None = None
-    retrieved_document_ids_column_name: List[str] | None = None
+    retrieved_document_ids_column_name: list[str] | None = None
     multi_class_threshold_scores_column_name: str | None = None
     semantic_segmentation_prediction_column_names: (
         SemanticSegmentationColumnNames | None
@@ -1257,7 +1283,8 @@ class Schema(BaseSchema):
         InstanceSegmentationActualColumnNames | None
     ) = None
-    def get_used_columns_counts(self) -> Dict[str, int]:
+    def get_used_columns_counts(self) -> dict[str, int]:
+        """Return a dict mapping column names to their usage count."""
         columns_used_counts = {}
         for field in self.__dataclass_fields__:
@@ -1364,6 +1391,7 @@ class Schema(BaseSchema):
         return columns_used_counts
     def has_prediction_columns(self) -> bool:
+        """Return True if prediction columns are configured."""
         prediction_cols = (
             self.prediction_label_column_name,
             self.prediction_score_column_name,
@@ -1377,6 +1405,7 @@ class Schema(BaseSchema):
         return any(col is not None for col in prediction_cols)
     def has_actual_columns(self) -> bool:
+        """Return True if actual label columns are configured."""
         actual_cols = (
             self.actual_label_column_name,
             self.actual_score_column_name,
@@ -1389,13 +1418,16 @@ class Schema(BaseSchema):
         return any(col is not None for col in actual_cols)
     def has_feature_importance_columns(self) -> bool:
+        """Return True if feature importance columns are configured."""
         feature_importance_cols = (self.shap_values_column_names,)
         return any(col is not None for col in feature_importance_cols)
     def has_typed_columns(self) -> bool:
+        """Return True if typed columns are configured."""
         return any(self.typed_column_fields())
-    def typed_column_fields(self) -> Set[str]:
+    def typed_column_fields(self) -> set[str]:
+        """Return the set of field names with typed columns."""
         return {
             field
             for field in self.__dataclass_fields__
@@ -1403,9 +1435,9 @@ class Schema(BaseSchema):
         }
     def is_delayed(self) -> bool:
-        """
-        This function checks if the given schema, according to the columns provided
-        by the user, has inherently latent information
+        """Check if the schema has inherently latent information.
+        Determines this based on the columns provided by the user.
         Returns:
             bool: True if the schema is "delayed", i.e., does not possess prediction
@@ -1418,11 +1450,14 @@ class Schema(BaseSchema):
 @dataclass(frozen=True)
 class CorpusSchema(BaseSchema):
+    """Schema for corpus data with document identification and content columns."""
     document_id_column_name: str | None = None
     document_version_column_name: str | None = None
     document_text_embedding_column_names: EmbeddingColumnNames | None = None
-    def get_used_columns_counts(self) -> Dict[str, int]:
+    def get_used_columns_counts(self) -> dict[str, int]:
+        """Return a dict mapping column names to their usage count."""
         columns_used_counts = {}
         if self.document_id_column_name is not None:
@@ -1459,6 +1494,8 @@ class CorpusSchema(BaseSchema):
 @unique
 class ArizeTypes(Enum):
+    """Enum representing supported data types in Arize platform."""
     STR = 0
     FLOAT = 1
     INT = 2
@@ -1466,76 +1503,13 @@ class ArizeTypes(Enum):
 @dataclass(frozen=True)
 class TypedValue:
+    """Container for a value with its associated Arize type."""
     type: ArizeTypes
     value: str | bool | float | int
-def is_json_str(s: str) -> bool:
-    try:
-        json.loads(s)
-    except ValueError:
-        return False
-    except TypeError:
-        return False
-    return True
-T = TypeVar("T", bound=type)
-def is_array_of(arr: Sequence[object], tp: T) -> bool:
-    return isinstance(arr, np.ndarray) and all(isinstance(x, tp) for x in arr)
-def is_list_of(lst: Sequence[object], tp: T) -> bool:
-    return isinstance(lst, list) and all(isinstance(x, tp) for x in lst)
-def is_iterable_of(lst: Sequence[object], tp: T) -> bool:
-    return isinstance(lst, Iterable) and all(isinstance(x, tp) for x in lst)
-def is_dict_of(
-    d: Dict[object, object],
-    key_allowed_types: T,
-    value_allowed_types: T = (),
-    value_list_allowed_types: T = (),
-) -> bool:
-    """
-    Method to check types are valid for dictionary.
-    Arguments:
-    ---------
-        d (Dict[object, object]): dictionary itself
-        key_allowed_types (T): all allowed types for keys of dictionary
-        value_allowed_types (T): all allowed types for values of dictionary
-        value_list_allowed_types (T): if value is a list, these are the allowed
-        types for value list
-    Returns:
-    -------
-        True if the data types of dictionary match the types specified by the
-        arguments, false otherwise
-    """
-    if value_list_allowed_types and not isinstance(
-        value_list_allowed_types, tuple
-    ):
-        value_list_allowed_types = (value_list_allowed_types,)
-    return (
-        isinstance(d, dict)
-        and all(isinstance(k, key_allowed_types) for k in d)
-        and all(
-            isinstance(v, value_allowed_types)
-            or any(is_list_of(v, t) for t in value_list_allowed_types)
-            for v in d.values()
-            if value_allowed_types or value_list_allowed_types
-        )
-    )
-def _count_characters_raw_data(data: str | List[str]) -> int:
+def _count_characters_raw_data(data: str | list[str]) -> int:
     character_count = 0
     if isinstance(data, str):
         character_count = len(data)
@@ -1551,8 +1525,14 @@ def _count_characters_raw_data(data: str | List[str]) -> int:
 def add_to_column_count_dictionary(
-    column_dictionary: Dict[str, int], col: str | None
-):
+    column_dictionary: dict[str, int], col: str | None
+) -> None:
+    """Increment the count for a column name in a dictionary.
+    Args:
+        column_dictionary: Dictionary mapping column names to counts.
+        col: The column name to increment, or None to skip.
+    """
     if col:
         if col in column_dictionary:
             column_dictionary[col] += 1
@@ -1560,7 +1540,9 @@ def add_to_column_count_dictionary(
             column_dictionary[col] = 1
-def _validate_bounding_box_coordinates(bounding_box_coordinates: List[float]):
+def _validate_bounding_box_coordinates(
+    bounding_box_coordinates: list[float],
+) -> None:
     if not is_list_of(bounding_box_coordinates, float):
         raise TypeError(
             "Each bounding box's coordinates must be a lists of floats"
@@ -1586,10 +1568,12 @@ def _validate_bounding_box_coordinates(bounding_box_coordinates: List[float]):
             f"top-left. Found {bounding_box_coordinates}"
         )
-    return None
+    return
-def _validate_polygon_coordinates(polygon_coordinates: List[List[float]]):
+def _validate_polygon_coordinates(
+    polygon_coordinates: list[list[float]],
+) -> None:
     if not is_list_of(polygon_coordinates, list):
         raise TypeError("Polygon coordinates must be a list of lists of floats")
     for coordinates in polygon_coordinates:
@@ -1651,27 +1635,41 @@ def _validate_polygon_coordinates(polygon_coordinates: List[List[float]]):
                         f"{coordinates}"
                     )
-    return None
+    return
-def segments_intersect(p1, p2, p3, p4):
-    """
-    Check if two line segments intersect.
+def segments_intersect(
+    p1: tuple[float, float],
+    p2: tuple[float, float],
+    p3: tuple[float, float],
+    p4: tuple[float, float],
+) -> bool:
+    """Check if two line segments intersect.
     Args:
-        p1, p2: First line segment endpoints (x,y)
-        p3, p4: Second line segment endpoints (x,y)
+        p1: First endpoint of the first line segment (x,y)
+        p2: Second endpoint of the first line segment (x,y)
+        p3: First endpoint of the second line segment (x,y)
+        p4: Second endpoint of the second line segment (x,y)
     Returns:
         True if the line segments intersect, False otherwise
     """
     # Function to calculate direction
-    def orientation(p, q, r):
+    def orientation(
+        p: tuple[float, float],
+        q: tuple[float, float],
+        r: tuple[float, float],
+    ) -> float:
         return (q[1] - p[1]) * (r[0] - q[0]) - (q[0] - p[0]) * (r[1] - q[1])
     # Function to check if point q is on segment pr
-    def on_segment(p, q, r):
+    def on_segment(
+        p: tuple[float, float],
+        q: tuple[float, float],
+        r: tuple[float, float],
+    ) -> bool:
         return (
             q[0] <= max(p[0], r[0])
             and q[0] >= min(p[0], r[0])
@@ -1703,17 +1701,20 @@ def segments_intersect(p1, p2, p3, p4):
 @unique
 class StatusCodes(Enum):
+    """Enum representing status codes for operations and responses."""
     UNSET = 0
     OK = 1
     ERROR = 2
     @classmethod
-    def list_codes(cls):
+    def list_codes(cls) -> list[str]:
+        """Return a list of all status code names."""
         return [t.name for t in cls]
-def convert_element(value):
-    """Converts scalar or array to python native"""
+def convert_element(value: object) -> object:
+    """Converts scalar or array to python native."""
     val = getattr(value, "tolist", lambda: value)()
     # Check if it's a list since elements from pd indices are converted to a
     # scalar whereas pd series/dataframe elements are converted to list of 1
@@ -1734,7 +1735,7 @@ PredictionLabelTypes = (
     | bool
     | int
     | float
-    | Tuple[str, float]
+    | tuple[str, float]
     | ObjectDetectionLabel
     | RankingPredictionLabel
     | MultiClassPredictionLabel
@@ -1745,7 +1746,7 @@ ActualLabelTypes = (
     | bool
     | int
     | float
-    | Tuple[str, float]
+    | tuple[str, float]
     | ObjectDetectionLabel
     | RankingActualLabel
     | MultiClassActualLabel

arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl

arize 8.0.0a22py3-none-any.whl → 8.0.0b0py3-none-any.whl