PyPI - arize - Versions diffs - 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl - Mend

arize 8.0.0a21py3-none-any.whl → 8.0.0a23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

arize/__init__.py +17 -9
arize/_exporter/client.py +55 -36
arize/_exporter/parsers/tracing_data_parser.py +41 -30
arize/_exporter/validation.py +3 -3
arize/_flight/client.py +208 -77
arize/_generated/api_client/__init__.py +30 -6
arize/_generated/api_client/api/__init__.py +1 -0
arize/_generated/api_client/api/datasets_api.py +864 -190
arize/_generated/api_client/api/experiments_api.py +167 -131
arize/_generated/api_client/api/projects_api.py +1197 -0
arize/_generated/api_client/api_client.py +2 -2
arize/_generated/api_client/configuration.py +42 -34
arize/_generated/api_client/exceptions.py +2 -2
arize/_generated/api_client/models/__init__.py +15 -4
arize/_generated/api_client/models/dataset.py +10 -10
arize/_generated/api_client/models/dataset_example.py +111 -0
arize/_generated/api_client/models/dataset_example_update.py +100 -0
arize/_generated/api_client/models/dataset_version.py +13 -13
arize/_generated/api_client/models/datasets_create_request.py +16 -8
arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
arize/_generated/api_client/models/datasets_list200_response.py +10 -4
arize/_generated/api_client/models/experiment.py +14 -16
arize/_generated/api_client/models/experiment_run.py +108 -0
arize/_generated/api_client/models/experiment_run_create.py +102 -0
arize/_generated/api_client/models/experiments_create_request.py +16 -10
arize/_generated/api_client/models/experiments_list200_response.py +10 -4
arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
arize/_generated/api_client/models/primitive_value.py +172 -0
arize/_generated/api_client/models/problem.py +100 -0
arize/_generated/api_client/models/project.py +99 -0
arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
arize/_generated/api_client/models/projects_list200_response.py +106 -0
arize/_generated/api_client/rest.py +2 -2
arize/_generated/api_client/test/test_dataset.py +4 -2
arize/_generated/api_client/test/test_dataset_example.py +56 -0
arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
arize/_generated/api_client/test/test_dataset_version.py +7 -2
arize/_generated/api_client/test/test_datasets_api.py +27 -13
arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
arize/_generated/api_client/test/test_experiment.py +2 -4
arize/_generated/api_client/test/test_experiment_run.py +56 -0
arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
arize/_generated/api_client/test/test_experiments_api.py +6 -6
arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
arize/_generated/api_client/test/test_problem.py +57 -0
arize/_generated/api_client/test/test_project.py +58 -0
arize/_generated/api_client/test/test_projects_api.py +59 -0
arize/_generated/api_client/test/test_projects_create_request.py +54 -0
arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
arize/_generated/api_client_README.md +43 -29
arize/_generated/protocol/flight/flight_pb2.py +400 -0
arize/_lazy.py +27 -19
arize/client.py +269 -55
arize/config.py +365 -116
arize/constants/__init__.py +1 -0
arize/constants/config.py +11 -4
arize/constants/ml.py +6 -4
arize/constants/openinference.py +2 -0
arize/constants/pyarrow.py +2 -0
arize/constants/spans.py +3 -1
arize/datasets/__init__.py +1 -0
arize/datasets/client.py +299 -84
arize/datasets/errors.py +32 -2
arize/datasets/validation.py +18 -8
arize/embeddings/__init__.py +2 -0
arize/embeddings/auto_generator.py +23 -19
arize/embeddings/base_generators.py +89 -36
arize/embeddings/constants.py +2 -0
arize/embeddings/cv_generators.py +26 -4
arize/embeddings/errors.py +27 -5
arize/embeddings/nlp_generators.py +31 -12
arize/embeddings/tabular_generators.py +32 -20
arize/embeddings/usecases.py +12 -2
arize/exceptions/__init__.py +1 -0
arize/exceptions/auth.py +11 -1
arize/exceptions/base.py +29 -4
arize/exceptions/models.py +21 -2
arize/exceptions/parameters.py +31 -0
arize/exceptions/spaces.py +12 -1
arize/exceptions/types.py +86 -7
arize/exceptions/values.py +220 -20
arize/experiments/__init__.py +1 -0
arize/experiments/client.py +390 -286
arize/experiments/evaluators/__init__.py +1 -0
arize/experiments/evaluators/base.py +74 -41
arize/experiments/evaluators/exceptions.py +6 -3
arize/experiments/evaluators/executors.py +121 -73
arize/experiments/evaluators/rate_limiters.py +106 -57
arize/experiments/evaluators/types.py +34 -7
arize/experiments/evaluators/utils.py +65 -27
arize/experiments/functions.py +103 -101
arize/experiments/tracing.py +52 -44
arize/experiments/types.py +56 -31
arize/logging.py +54 -22
arize/models/__init__.py +1 -0
arize/models/batch_validation/__init__.py +1 -0
arize/models/batch_validation/errors.py +543 -65
arize/models/batch_validation/validator.py +339 -300
arize/models/bounded_executor.py +20 -7
arize/models/casting.py +75 -29
arize/models/client.py +326 -107
arize/models/proto.py +95 -40
arize/models/stream_validation.py +42 -14
arize/models/surrogate_explainer/__init__.py +1 -0
arize/models/surrogate_explainer/mimic.py +24 -13
arize/pre_releases.py +43 -0
arize/projects/__init__.py +1 -0
arize/projects/client.py +129 -0
arize/regions.py +40 -0
arize/spans/__init__.py +1 -0
arize/spans/client.py +130 -106
arize/spans/columns.py +13 -0
arize/spans/conversion.py +54 -38
arize/spans/validation/__init__.py +1 -0
arize/spans/validation/annotations/__init__.py +1 -0
arize/spans/validation/annotations/annotations_validation.py +6 -4
arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
arize/spans/validation/annotations/value_validation.py +35 -11
arize/spans/validation/common/__init__.py +1 -0
arize/spans/validation/common/argument_validation.py +33 -8
arize/spans/validation/common/dataframe_form_validation.py +35 -9
arize/spans/validation/common/errors.py +211 -11
arize/spans/validation/common/value_validation.py +80 -13
arize/spans/validation/evals/__init__.py +1 -0
arize/spans/validation/evals/dataframe_form_validation.py +28 -8
arize/spans/validation/evals/evals_validation.py +34 -4
arize/spans/validation/evals/value_validation.py +26 -3
arize/spans/validation/metadata/__init__.py +1 -1
arize/spans/validation/metadata/argument_validation.py +14 -5
arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
arize/spans/validation/metadata/value_validation.py +24 -10
arize/spans/validation/spans/__init__.py +1 -0
arize/spans/validation/spans/dataframe_form_validation.py +34 -13
arize/spans/validation/spans/spans_validation.py +35 -4
arize/spans/validation/spans/value_validation.py +76 -7
arize/types.py +293 -157
arize/utils/__init__.py +1 -0
arize/utils/arrow.py +31 -15
arize/utils/cache.py +34 -6
arize/utils/dataframe.py +19 -2
arize/utils/online_tasks/__init__.py +2 -0
arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
arize/utils/openinference_conversion.py +44 -5
arize/utils/proto.py +10 -0
arize/utils/size.py +5 -3
arize/version.py +3 -1
{arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
arize-8.0.0a23.dist-info/RECORD +174 -0
{arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
arize/_generated/protocol/flight/export_pb2.py +0 -61
arize/_generated/protocol/flight/ingest_pb2.py +0 -365
arize-8.0.0a21.dist-info/RECORD +0 -146
arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12

arize/types.py CHANGED Viewed

@@ -1,19 +1,17 @@
+"""Common type definitions and data models used across the Arize SDK."""
 import json
 import logging
 import math
+from collections.abc import Iterable, Iterator, Sequence
 from dataclasses import asdict, dataclass, replace
 from datetime import datetime
 from decimal import Decimal
 from enum import Enum, unique
 from itertools import chain
 from typing import (
-    Dict,
-    Iterable,
-    List,
     NamedTuple,
-    Sequence,
-    Set,
-    Tuple,
+    Self,
     TypeVar,
 )
@@ -48,6 +46,8 @@ logger = logging.getLogger(__name__)
 @unique
 class ModelTypes(Enum):
+    """Enum representing supported model types in Arize."""
     NUMERIC = 1
     SCORE_CATEGORICAL = 2
     RANKING = 3
@@ -58,7 +58,8 @@ class ModelTypes(Enum):
     MULTI_CLASS = 8
     @classmethod
-    def list_types(cls):
+    def list_types(cls) -> list[str]:
+        """Return a list of all type names in this enum."""
         return [t.name for t in cls]
@@ -70,7 +71,10 @@ CATEGORICAL_MODEL_TYPES = [
 class DocEnum(Enum):
-    def __new__(cls, value, doc=None):
+    """Enum subclass supporting inline documentation for enum members."""
+    def __new__(cls, value: object, doc: str | None = None) -> Self:
+        """Create a new enum instance with optional documentation."""
         self = object.__new__(
             cls
         )  # calling super().__new__(value) here would fail
@@ -80,13 +84,13 @@ class DocEnum(Enum):
         return self
     def __repr__(self) -> str:
+        """Return a string representation including documentation."""
         return f"{self.name} metrics include: {self.__doc__}"
 @unique
 class Metrics(DocEnum):
-    """
-    Metric groupings, used for validation of schema columns in log() call.
+    """Metric groupings, used for validation of schema columns in log() call.
     See docstring descriptions of the Enum with __doc__ or __repr__(), e.g.:
     Metrics.RANKING.__doc__
@@ -105,6 +109,8 @@ class Metrics(DocEnum):
 @unique
 class Environments(Enum):
+    """Enum representing deployment environments for models."""
     TRAINING = 1
     VALIDATION = 2
     PRODUCTION = 3
@@ -114,11 +120,18 @@ class Environments(Enum):
 @dataclass
 class EmbeddingColumnNames:
+    """Column names for embedding feature data."""
     vector_column_name: str = ""
     data_column_name: str | None = None
     link_to_data_column_name: str | None = None
-    def __post_init__(self):
+    def __post_init__(self) -> None:
+        """Validate that vector column name is specified.
+        Raises:
+            ValueError: If vector_column_name is empty.
+        """
         if not self.vector_column_name:
             raise ValueError(
                 "embedding_features require a vector to be specified. You can "
@@ -126,7 +139,8 @@ class EmbeddingColumnNames:
                 "(from arize.pandas.embeddings) if you do not have them"
             )
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the embedding column names."""
         return iter(
             (
                 self.vector_column_name,
@@ -137,14 +151,16 @@ class EmbeddingColumnNames:
 class Embedding(NamedTuple):
-    vector: List[float]
-    data: str | List[str] | None = None
+    """Container for embedding vector data with optional raw data and links."""
+    vector: list[float]
+    data: str | list[str] | None = None
     link_to_data: str | None = None
     def validate(self, emb_name: str | int | float) -> None:
-        """
-        Validates that the embedding object passed is of the correct format.
-        That is, validations must be passed for vector, data & link_to_data.
+        """Validates that the embedding object passed is of the correct format.
+        Ensures validations are passed for vector, data, and link_to_data fields.
         Arguments:
         ---------
@@ -167,19 +183,16 @@ class Embedding(NamedTuple):
         if self.link_to_data is not None:
             self._validate_embedding_link_to_data(emb_name, self.link_to_data)
-        return None
+        return
     def _validate_embedding_vector(
         self,
         emb_name: str | int | float,
     ) -> None:
-        """
-        Validates that the embedding vector passed is of the correct format.
-        That is:
-            1. Type must be list or convertible to list (like numpy arrays,
-            pandas Series)
-            2. List must not be empty
-            3. Elements in list must be floats
+        """Validates that the embedding vector passed is of the correct format.
+        Requirements: 1) Type must be list or convertible to list (like numpy arrays,
+        pandas Series), 2) List must not be empty, 3) Elements in list must be floats.
         Arguments:
         ---------
@@ -209,11 +222,11 @@ class Embedding(NamedTuple):
     @staticmethod
     def _validate_embedding_data(
-        emb_name: str | int | float, data: str | List[str]
+        emb_name: str | int | float, data: str | list[str]
     ) -> None:
-        """
-        Validates that the embedding raw data field is of the correct format. That is:
-            1. Must be string or list of strings (NLP case)
+        """Validates that the embedding raw data field is of the correct format.
+        Requirement: Must be string or list of strings (NLP case).
         Arguments:
         ---------
@@ -247,7 +260,7 @@ class Embedding(NamedTuple):
                 f"Embedding data field must not contain more than {MAX_RAW_DATA_CHARACTERS} characters. "
                 f"Found {character_count}."
             )
-        elif character_count > MAX_RAW_DATA_CHARACTERS_TRUNCATION:
+        if character_count > MAX_RAW_DATA_CHARACTERS_TRUNCATION:
             logger.warning(
                 get_truncation_warning_message(
                     "Embedding raw data fields",
@@ -259,9 +272,9 @@ class Embedding(NamedTuple):
     def _validate_embedding_link_to_data(
         emb_name: str | int | float, link_to_data: str
     ) -> None:
-        """
-        Validates that the embedding link to data field is of the correct format. That is:
-            1. Must be string
+        """Validates that the embedding link to data field is of the correct format.
+        Requirement: Must be string.
         Arguments:
         ---------
@@ -282,13 +295,11 @@ class Embedding(NamedTuple):
     @staticmethod
     def _is_valid_iterable(
-        data: str | List[str] | List[float] | np.ndarray,
+        data: str | list[str] | list[float] | np.ndarray,
     ) -> bool:
-        """
-        Validates that the input data field is of the correct iterable type. That is:
-            1. List or
-            2. numpy array or
-            3. pandas Series
+        """Validates that the input data field is of the correct iterable type.
+        Accepted types: 1) List, 2) numpy array, or 3) pandas Series.
         Arguments:
         ---------
@@ -327,12 +338,15 @@ class Embedding(NamedTuple):
 class LLMRunMetadata(NamedTuple):
+    """Metadata for LLM execution including token counts and latency."""
     total_token_count: int | None = None
     prompt_token_count: int | None = None
     response_token_count: int | None = None
     response_latency_ms: int | float | None = None
     def validate(self) -> None:
+        """Validate the field values and constraints."""
         allowed_types = (int, float, np.int16, np.int32, np.float16, np.float32)
         if not isinstance(self.total_token_count, allowed_types):
             raise InvalidValueType(
@@ -361,9 +375,9 @@ class LLMRunMetadata(NamedTuple):
 class ObjectDetectionColumnNames(NamedTuple):
-    """
-    Used to log object detection prediction and actual values that are assigned to the prediction or
-    actual schema parameter.
+    """Used to log object detection prediction and actual values.
+    These values are assigned to the prediction or actual schema parameter.
     Arguments:
     ---------
@@ -385,9 +399,9 @@ class ObjectDetectionColumnNames(NamedTuple):
 class SemanticSegmentationColumnNames(NamedTuple):
-    """
-    Used to log semantic segmentation prediction and actual values that are assigned to the prediction or
-    actual schema parameter.
+    """Used to log semantic segmentation prediction and actual values.
+    These values are assigned to the prediction or actual schema parameter.
     Arguments:
     ---------
@@ -405,8 +419,7 @@ class SemanticSegmentationColumnNames(NamedTuple):
 class InstanceSegmentationPredictionColumnNames(NamedTuple):
-    """
-    Used to log instance segmentation prediction values that are assigned to the prediction schema parameter.
+    """Used to log instance segmentation prediction values for the prediction schema parameter.
     Arguments:
     ---------
@@ -433,8 +446,7 @@ class InstanceSegmentationPredictionColumnNames(NamedTuple):
 class InstanceSegmentationActualColumnNames(NamedTuple):
-    """
-    Used to log instance segmentation actual values that are assigned to the actual schema parameter.
+    """Used to log instance segmentation actual values that are assigned to the actual schema parameter.
     Arguments:
     ---------
@@ -455,12 +467,15 @@ class InstanceSegmentationActualColumnNames(NamedTuple):
 class ObjectDetectionLabel(NamedTuple):
-    bounding_boxes_coordinates: List[List[float]]
-    categories: List[str]
+    """Label data for object detection tasks with bounding boxes and categories."""
+    bounding_boxes_coordinates: list[list[float]]
+    categories: list[str]
     # Actual Object Detection Labels won't have scores
-    scores: List[float] | None = None
+    scores: list[float] | None = None
-    def validate(self, prediction_or_actual: str):
+    def validate(self, prediction_or_actual: str) -> None:
+        """Validate the object detection label fields and constraints."""
         # Validate bounding boxes
         self._validate_bounding_boxes_coordinates()
         # Validate categories
@@ -470,7 +485,7 @@ class ObjectDetectionLabel(NamedTuple):
         # Validate we have the same number of bounding boxes, categories and scores
         self._validate_count_match()
-    def _validate_bounding_boxes_coordinates(self):
+    def _validate_bounding_boxes_coordinates(self) -> None:
         if not is_list_of(self.bounding_boxes_coordinates, list):
             raise TypeError(
                 "Object Detection Label bounding boxes must be a list of lists of floats"
@@ -478,14 +493,14 @@ class ObjectDetectionLabel(NamedTuple):
         for coordinates in self.bounding_boxes_coordinates:
             _validate_bounding_box_coordinates(coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Object Detection Label categories must be a list of strings"
             )
-    def _validate_scores(self, prediction_or_actual: str):
+    def _validate_scores(self, prediction_or_actual: str) -> None:
         if self.scores is None:
             if prediction_or_actual == "prediction":
                 raise ValueError(
@@ -507,7 +522,7 @@ class ObjectDetectionLabel(NamedTuple):
                     f"{self.scores}"
                 )
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_bounding_boxes = len(self.bounding_boxes_coordinates)
         if n_bounding_boxes == 0:
             raise ValueError(
@@ -534,10 +549,13 @@ class ObjectDetectionLabel(NamedTuple):
 class SemanticSegmentationLabel(NamedTuple):
-    polygon_coordinates: List[List[float]]
-    categories: List[str]
+    """Label data for semantic segmentation with polygon coordinates and categories."""
-    def validate(self):
+    polygon_coordinates: list[list[float]]
+    categories: list[str]
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate polygon coordinates
         self._validate_polygon_coordinates()
         # Validate categories
@@ -545,17 +563,17 @@ class SemanticSegmentationLabel(NamedTuple):
         # Validate we have the same number of polygon coordinates and categories
         self._validate_count_match()
-    def _validate_polygon_coordinates(self):
+    def _validate_polygon_coordinates(self) -> None:
         _validate_polygon_coordinates(self.polygon_coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Semantic Segmentation Label categories must be a list of strings"
             )
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_polygon_coordinates = len(self.polygon_coordinates)
         if n_polygon_coordinates == 0:
             raise ValueError(
@@ -573,12 +591,15 @@ class SemanticSegmentationLabel(NamedTuple):
 class InstanceSegmentationPredictionLabel(NamedTuple):
-    polygon_coordinates: List[List[float]]
-    categories: List[str]
-    scores: List[float] | None = None
-    bounding_boxes_coordinates: List[List[float]] | None = None
+    """Prediction label for instance segmentation with polygons and category information."""
+    polygon_coordinates: list[list[float]]
+    categories: list[str]
+    scores: list[float] | None = None
+    bounding_boxes_coordinates: list[list[float]] | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate polygon coordinates
         self._validate_polygon_coordinates()
         # Validate categories
@@ -590,17 +611,17 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
         # Validate we have the same number of polygon coordinates and categories
         self._validate_count_match()
-    def _validate_polygon_coordinates(self):
+    def _validate_polygon_coordinates(self) -> None:
         _validate_polygon_coordinates(self.polygon_coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Instance Segmentation Prediction Label categories must be a list of strings"
             )
-    def _validate_scores(self):
+    def _validate_scores(self) -> None:
         if self.scores is not None:
             if not is_list_of(self.scores, float):
                 raise TypeError(
@@ -613,7 +634,7 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
                     f"{self.scores}"
                 )
-    def _validate_bounding_boxes(self):
+    def _validate_bounding_boxes(self) -> None:
         if self.bounding_boxes_coordinates is not None:
             if not is_list_of(self.bounding_boxes_coordinates, list):
                 raise TypeError(
@@ -622,7 +643,7 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
             for coordinates in self.bounding_boxes_coordinates:
                 _validate_bounding_box_coordinates(coordinates)
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_polygon_coordinates = len(self.polygon_coordinates)
         if n_polygon_coordinates == 0:
             raise ValueError(
@@ -657,11 +678,14 @@ class InstanceSegmentationPredictionLabel(NamedTuple):
 class InstanceSegmentationActualLabel(NamedTuple):
-    polygon_coordinates: List[List[float]]
-    categories: List[str]
-    bounding_boxes_coordinates: List[List[float]] | None = None
+    """Actual label for instance segmentation with polygon coordinates and categories."""
+    polygon_coordinates: list[list[float]]
+    categories: list[str]
+    bounding_boxes_coordinates: list[list[float]] | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate polygon coordinates
         self._validate_polygon_coordinates()
         # Validate categories
@@ -671,17 +695,17 @@ class InstanceSegmentationActualLabel(NamedTuple):
         # Validate we have the same number of polygon coordinates and categories
         self._validate_count_match()
-    def _validate_polygon_coordinates(self):
+    def _validate_polygon_coordinates(self) -> None:
         _validate_polygon_coordinates(self.polygon_coordinates)
-    def _validate_categories(self):
+    def _validate_categories(self) -> None:
         # Allows for categories as empty strings
         if not is_list_of(self.categories, str):
             raise TypeError(
                 "Instance Segmentation Actual Label categories must be a list of strings"
             )
-    def _validate_bounding_boxes(self):
+    def _validate_bounding_boxes(self) -> None:
         if self.bounding_boxes_coordinates is not None:
             if not is_list_of(self.bounding_boxes_coordinates, list):
                 raise TypeError(
@@ -690,7 +714,7 @@ class InstanceSegmentationActualLabel(NamedTuple):
             for coordinates in self.bounding_boxes_coordinates:
                 _validate_bounding_box_coordinates(coordinates)
-    def _validate_count_match(self):
+    def _validate_count_match(self) -> None:
         n_polygon_coordinates = len(self.polygon_coordinates)
         if n_polygon_coordinates == 0:
             raise ValueError(
@@ -717,8 +741,7 @@ class InstanceSegmentationActualLabel(NamedTuple):
 class MultiClassPredictionLabel(NamedTuple):
-    """
-    Used to log multi class prediction label
+    """Used to log multi class prediction label.
     Arguments:
     ---------
@@ -729,15 +752,16 @@ class MultiClassPredictionLabel(NamedTuple):
     """
-    prediction_scores: Dict[str, float | int]
-    threshold_scores: Dict[str, float | int] | None = None
+    prediction_scores: dict[str, float | int]
+    threshold_scores: dict[str, float | int] | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate scores
         self._validate_prediction_scores()
         self._validate_threshold_scores()
-    def _validate_prediction_scores(self):
+    def _validate_prediction_scores(self) -> None:
         # prediction dictionary validations
         if not is_dict_of(
             self.prediction_scores,
@@ -778,7 +802,7 @@ class MultiClassPredictionLabel(NamedTuple):
                     "invalid. All scores (values in dictionary) must be between 0 and 1, inclusive."
                 )
-    def _validate_threshold_scores(self):
+    def _validate_threshold_scores(self) -> None:
         if self.threshold_scores is None or len(self.threshold_scores) == 0:
             return
         if not is_dict_of(
@@ -822,8 +846,7 @@ class MultiClassPredictionLabel(NamedTuple):
 class MultiClassActualLabel(NamedTuple):
-    """
-    Used to log multi class actual label
+    """Used to log multi class actual label.
     Arguments:
     ---------
@@ -833,13 +856,14 @@ class MultiClassActualLabel(NamedTuple):
     """
-    actual_scores: Dict[str, float | int]
+    actual_scores: dict[str, float | int]
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate scores
         self._validate_actual_scores()
-    def _validate_actual_scores(self):
+    def _validate_actual_scores(self) -> None:
         if not is_dict_of(
             self.actual_scores,
             key_allowed_types=str,
@@ -879,12 +903,15 @@ class MultiClassActualLabel(NamedTuple):
 class RankingPredictionLabel(NamedTuple):
+    """Prediction label for ranking tasks with group and rank information."""
     group_id: str
     rank: int
     score: float | None = None
     label: str | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate existence of required fields: prediction_group_id and rank
         if self.group_id is None or self.rank is None:
             raise ValueError(
@@ -901,7 +928,7 @@ class RankingPredictionLabel(NamedTuple):
         if self.score is not None:
             self._validate_score()
-    def _validate_group_id(self):
+    def _validate_group_id(self) -> None:
         if not isinstance(self.group_id, str):
             raise TypeError("Prediction Group ID must be a string")
         if not (1 <= len(self.group_id) <= 36):
@@ -909,7 +936,7 @@ class RankingPredictionLabel(NamedTuple):
                 f"Prediction Group ID must have length between 1 and 36. Found {len(self.group_id)}"
             )
-    def _validate_rank(self):
+    def _validate_rank(self) -> None:
         if not isinstance(self.rank, int):
             raise TypeError("Prediction Rank must be an int")
         if not (1 <= self.rank <= 100):
@@ -917,22 +944,25 @@ class RankingPredictionLabel(NamedTuple):
                 f"Prediction Rank must be between 1 and 100, inclusive. Found {self.rank}"
             )
-    def _validate_label(self):
+    def _validate_label(self) -> None:
         if not isinstance(self.label, str):
             raise TypeError("Prediction Label must be a str")
         if self.label == "":
             raise ValueError("Prediction Label must not be an empty string.")
-    def _validate_score(self):
+    def _validate_score(self) -> None:
         if not isinstance(self.score, (float, int)):
             raise TypeError("Prediction Score must be a float or an int")
 class RankingActualLabel(NamedTuple):
-    relevance_labels: List[str] | None = None
+    """Actual label for ranking tasks with relevance information."""
+    relevance_labels: list[str] | None = None
     relevance_score: float | None = None
-    def validate(self):
+    def validate(self) -> None:
+        """Validate the field values and constraints."""
         # Validate relevance_labels type
         if self.relevance_labels is not None:
             self._validate_relevance_labels(self.relevance_labels)
@@ -941,7 +971,7 @@ class RankingActualLabel(NamedTuple):
             self._validate_relevance_score(self.relevance_score)
     @staticmethod
-    def _validate_relevance_labels(relevance_labels: List[str]):
+    def _validate_relevance_labels(relevance_labels: list[str]) -> None:
         if not is_list_of(relevance_labels, str):
             raise TypeError("Actual Relevance Labels must be a list of strings")
         if any(label == "" for label in relevance_labels):
@@ -950,17 +980,20 @@ class RankingActualLabel(NamedTuple):
             )
     @staticmethod
-    def _validate_relevance_score(relevance_score: float):
+    def _validate_relevance_score(relevance_score: float) -> None:
         if not isinstance(relevance_score, (float, int)):
             raise TypeError("Actual Relevance score must be a float or an int")
 @dataclass
 class PromptTemplateColumnNames:
+    """Column names for prompt template configuration in LLM schemas."""
     template_column_name: str | None = None
     template_version_column_name: str | None = None
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the prompt template column names."""
         return iter(
             (self.template_column_name, self.template_version_column_name)
         )
@@ -968,21 +1001,27 @@ class PromptTemplateColumnNames:
 @dataclass
 class LLMConfigColumnNames:
+    """Column names for LLM configuration parameters in schemas."""
     model_column_name: str | None = None
     params_column_name: str | None = None
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the LLM config column names."""
         return iter((self.model_column_name, self.params_column_name))
 @dataclass
 class LLMRunMetadataColumnNames:
+    """Column names for LLM run metadata fields in schemas."""
     total_token_count_column_name: str | None = None
     prompt_token_count_column_name: str | None = None
     response_token_count_column_name: str | None = None
     response_latency_ms_column_name: str | None = None
-    def __iter__(self):
+    def __iter__(self) -> Iterator[str | None]:
+        """Iterate over the LLM run metadata column names."""
         return iter(
             (
                 self.total_token_count_column_name,
@@ -1011,11 +1050,19 @@ class LLMRunMetadataColumnNames:
 #
 @dataclass
 class SimilarityReference:
+    """Reference to a prediction for similarity search operations."""
     prediction_id: str
     reference_column_name: str
     prediction_timestamp: datetime | None = None
-    def __post_init__(self):
+    def __post_init__(self) -> None:
+        """Validate similarity reference fields after initialization.
+        Raises:
+            ValueError: If prediction_id or reference_column_name is empty.
+            TypeError: If prediction_timestamp is not a datetime object.
+        """
         if self.prediction_id == "":
             raise ValueError("prediction id cannot be empty")
         if self.reference_column_name == "":
@@ -1028,11 +1075,20 @@ class SimilarityReference:
 @dataclass
 class SimilaritySearchParams:
-    references: List[SimilarityReference]
+    """Parameters for configuring similarity search operations."""
+    references: list[SimilarityReference]
     search_column_name: str
     threshold: float = 0
-    def __post_init__(self):
+    def __post_init__(self) -> None:
+        """Validate similarity search parameters after initialization.
+        Raises:
+            ValueError: If references list is invalid, search_column_name is
+                empty, or threshold is out of range.
+            TypeError: If any reference is not a SimilarityReference instance.
+        """
         if (
             not self.references
             or len(self.references) <= 0
@@ -1054,23 +1110,28 @@ class SimilaritySearchParams:
 @dataclass(frozen=True)
 class BaseSchema:
-    def replace(self, **changes):
+    """Base class for all schema definitions with immutable fields."""
+    def replace(self, **changes: object) -> Self:
+        """Return a new instance with specified fields replaced."""
         return replace(self, **changes)
-    def asdict(self) -> Dict[str, str]:
+    def asdict(self) -> dict[str, str]:
+        """Convert the schema to a dictionary."""
         return asdict(self)
-    def get_used_columns(self) -> Set[str]:
+    def get_used_columns(self) -> set[str]:
+        """Return the set of column names used in this schema."""
         return set(self.get_used_columns_counts().keys())
-    def get_used_columns_counts(self) -> Dict[str, int]:
+    def get_used_columns_counts(self) -> dict[str, int]:
+        """Return a dict mapping column names to their usage count."""
         raise NotImplementedError()
 @dataclass(frozen=True)
 class TypedColumns:
-    """
-    Optional class used for explicit type enforcement of feature and tag columns in the dataframe.
+    """Optional class used for explicit type enforcement of feature and tag columns in the dataframe.
     Usage:
     ------
@@ -1101,30 +1162,31 @@ class TypedColumns:
     """
-    inferred: List[str] | None = None
-    to_str: List[str] | None = None
-    to_int: List[str] | None = None
-    to_float: List[str] | None = None
+    inferred: list[str] | None = None
+    to_str: list[str] | None = None
+    to_int: list[str] | None = None
+    to_float: list[str] | None = None
-    def get_all_column_names(self) -> List[str]:
+    def get_all_column_names(self) -> list[str]:
+        """Return all column names across all conversion lists."""
         return list(chain.from_iterable(filter(None, self.__dict__.values())))
-    def has_duplicate_columns(self) -> Tuple[bool, Set[str]]:
+    def has_duplicate_columns(self) -> tuple[bool, set[str]]:
+        """Check for duplicate columns and return (has_duplicates, duplicate_set)."""
         # True if there are duplicates within a field's list or across fields.
         # Return a set of the duplicate column names.
         cols = self.get_all_column_names()
-        duplicates = set([x for x in cols if cols.count(x) > 1])
+        duplicates = {x for x in cols if cols.count(x) > 1}
         return len(duplicates) > 0, duplicates
     def is_empty(self) -> bool:
+        """Return True if no columns are configured for conversion."""
         return not self.get_all_column_names()
 @dataclass(frozen=True)
 class Schema(BaseSchema):
-    """
-    Used to organize and map column names containing model data within your Pandas dataframe to
-    Arize.
+    """Used to organize and map column names containing model data within your Pandas dataframe to Arize.
     Arguments:
     ---------
@@ -1215,15 +1277,15 @@ class Schema(BaseSchema):
     """
     prediction_id_column_name: str | None = None
-    feature_column_names: List[str] | TypedColumns | None = None
-    tag_column_names: List[str] | TypedColumns | None = None
+    feature_column_names: list[str] | TypedColumns | None = None
+    tag_column_names: list[str] | TypedColumns | None = None
     timestamp_column_name: str | None = None
     prediction_label_column_name: str | None = None
     prediction_score_column_name: str | None = None
     actual_label_column_name: str | None = None
     actual_score_column_name: str | None = None
-    shap_values_column_names: Dict[str, str] | None = None
-    embedding_feature_column_names: Dict[str, EmbeddingColumnNames] | None = (
+    shap_values_column_names: dict[str, str] | None = None
+    embedding_feature_column_names: dict[str, EmbeddingColumnNames] | None = (
         None  # type:ignore
     )
     prediction_group_id_column_name: str | None = None
@@ -1242,7 +1304,7 @@ class Schema(BaseSchema):
     prompt_template_column_names: PromptTemplateColumnNames | None = None
     llm_config_column_names: LLMConfigColumnNames | None = None
     llm_run_metadata_column_names: LLMRunMetadataColumnNames | None = None
-    retrieved_document_ids_column_name: List[str] | None = None
+    retrieved_document_ids_column_name: list[str] | None = None
     multi_class_threshold_scores_column_name: str | None = None
     semantic_segmentation_prediction_column_names: (
         SemanticSegmentationColumnNames | None
@@ -1257,7 +1319,8 @@ class Schema(BaseSchema):
         InstanceSegmentationActualColumnNames | None
     ) = None
-    def get_used_columns_counts(self) -> Dict[str, int]:
+    def get_used_columns_counts(self) -> dict[str, int]:
+        """Return a dict mapping column names to their usage count."""
         columns_used_counts = {}
         for field in self.__dataclass_fields__:
@@ -1364,6 +1427,7 @@ class Schema(BaseSchema):
         return columns_used_counts
     def has_prediction_columns(self) -> bool:
+        """Return True if prediction columns are configured."""
         prediction_cols = (
             self.prediction_label_column_name,
             self.prediction_score_column_name,
@@ -1377,6 +1441,7 @@ class Schema(BaseSchema):
         return any(col is not None for col in prediction_cols)
     def has_actual_columns(self) -> bool:
+        """Return True if actual label columns are configured."""
         actual_cols = (
             self.actual_label_column_name,
             self.actual_score_column_name,
@@ -1389,13 +1454,16 @@ class Schema(BaseSchema):
         return any(col is not None for col in actual_cols)
     def has_feature_importance_columns(self) -> bool:
+        """Return True if feature importance columns are configured."""
         feature_importance_cols = (self.shap_values_column_names,)
         return any(col is not None for col in feature_importance_cols)
     def has_typed_columns(self) -> bool:
+        """Return True if typed columns are configured."""
         return any(self.typed_column_fields())
-    def typed_column_fields(self) -> Set[str]:
+    def typed_column_fields(self) -> set[str]:
+        """Return the set of field names with typed columns."""
         return {
             field
             for field in self.__dataclass_fields__
@@ -1403,9 +1471,9 @@ class Schema(BaseSchema):
         }
     def is_delayed(self) -> bool:
-        """
-        This function checks if the given schema, according to the columns provided
-        by the user, has inherently latent information
+        """Check if the schema has inherently latent information.
+        Determines this based on the columns provided by the user.
         Returns:
             bool: True if the schema is "delayed", i.e., does not possess prediction
@@ -1418,11 +1486,14 @@ class Schema(BaseSchema):
 @dataclass(frozen=True)
 class CorpusSchema(BaseSchema):
+    """Schema for corpus data with document identification and content columns."""
     document_id_column_name: str | None = None
     document_version_column_name: str | None = None
     document_text_embedding_column_names: EmbeddingColumnNames | None = None
-    def get_used_columns_counts(self) -> Dict[str, int]:
+    def get_used_columns_counts(self) -> dict[str, int]:
+        """Return a dict mapping column names to their usage count."""
         columns_used_counts = {}
         if self.document_id_column_name is not None:
@@ -1459,6 +1530,8 @@ class CorpusSchema(BaseSchema):
 @unique
 class ArizeTypes(Enum):
+    """Enum representing supported data types in Arize platform."""
     STR = 0
     FLOAT = 1
     INT = 2
@@ -1466,11 +1539,21 @@ class ArizeTypes(Enum):
 @dataclass(frozen=True)
 class TypedValue:
+    """Container for a value with its associated Arize type."""
     type: ArizeTypes
     value: str | bool | float | int
 def is_json_str(s: str) -> bool:
+    """Check if a string is valid JSON.
+    Args:
+        s: The string to validate.
+    Returns:
+        True if the string is valid JSON, False otherwise.
+    """
     try:
         json.loads(s)
     except ValueError:
@@ -1484,25 +1567,51 @@ T = TypeVar("T", bound=type)
 def is_array_of(arr: Sequence[object], tp: T) -> bool:
+    """Check if a value is a numpy array with all elements of a specific type.
+    Args:
+        arr: The sequence to check.
+        tp: The expected type for all elements.
+    Returns:
+        True if arr is a numpy array and all elements are of type tp.
+    """
     return isinstance(arr, np.ndarray) and all(isinstance(x, tp) for x in arr)
 def is_list_of(lst: Sequence[object], tp: T) -> bool:
+    """Check if a value is a list with all elements of a specific type.
+    Args:
+        lst: The sequence to check.
+        tp: The expected type for all elements.
+    Returns:
+        True if lst is a list and all elements are of type tp.
+    """
     return isinstance(lst, list) and all(isinstance(x, tp) for x in lst)
 def is_iterable_of(lst: Sequence[object], tp: T) -> bool:
+    """Check if a value is an iterable with all elements of a specific type.
+    Args:
+        lst: The sequence to check.
+        tp: The expected type for all elements.
+    Returns:
+        True if lst is an iterable and all elements are of type tp.
+    """
     return isinstance(lst, Iterable) and all(isinstance(x, tp) for x in lst)
 def is_dict_of(
-    d: Dict[object, object],
+    d: dict[object, object],
     key_allowed_types: T,
     value_allowed_types: T = (),
     value_list_allowed_types: T = (),
 ) -> bool:
-    """
-    Method to check types are valid for dictionary.
+    """Method to check types are valid for dictionary.
     Arguments:
     ---------
@@ -1535,7 +1644,7 @@ def is_dict_of(
     )
-def _count_characters_raw_data(data: str | List[str]) -> int:
+def _count_characters_raw_data(data: str | list[str]) -> int:
     character_count = 0
     if isinstance(data, str):
         character_count = len(data)
@@ -1551,8 +1660,14 @@ def _count_characters_raw_data(data: str | List[str]) -> int:
 def add_to_column_count_dictionary(
-    column_dictionary: Dict[str, int], col: str | None
-):
+    column_dictionary: dict[str, int], col: str | None
+) -> None:
+    """Increment the count for a column name in a dictionary.
+    Args:
+        column_dictionary: Dictionary mapping column names to counts.
+        col: The column name to increment, or None to skip.
+    """
     if col:
         if col in column_dictionary:
             column_dictionary[col] += 1
@@ -1560,7 +1675,9 @@ def add_to_column_count_dictionary(
             column_dictionary[col] = 1
-def _validate_bounding_box_coordinates(bounding_box_coordinates: List[float]):
+def _validate_bounding_box_coordinates(
+    bounding_box_coordinates: list[float],
+) -> None:
     if not is_list_of(bounding_box_coordinates, float):
         raise TypeError(
             "Each bounding box's coordinates must be a lists of floats"
@@ -1586,10 +1703,12 @@ def _validate_bounding_box_coordinates(bounding_box_coordinates: List[float]):
             f"top-left. Found {bounding_box_coordinates}"
         )
-    return None
+    return
-def _validate_polygon_coordinates(polygon_coordinates: List[List[float]]):
+def _validate_polygon_coordinates(
+    polygon_coordinates: list[list[float]],
+) -> None:
     if not is_list_of(polygon_coordinates, list):
         raise TypeError("Polygon coordinates must be a list of lists of floats")
     for coordinates in polygon_coordinates:
@@ -1651,27 +1770,41 @@ def _validate_polygon_coordinates(polygon_coordinates: List[List[float]]):
                         f"{coordinates}"
                     )
-    return None
+    return
-def segments_intersect(p1, p2, p3, p4):
-    """
-    Check if two line segments intersect.
+def segments_intersect(
+    p1: tuple[float, float],
+    p2: tuple[float, float],
+    p3: tuple[float, float],
+    p4: tuple[float, float],
+) -> bool:
+    """Check if two line segments intersect.
     Args:
-        p1, p2: First line segment endpoints (x,y)
-        p3, p4: Second line segment endpoints (x,y)
+        p1: First endpoint of the first line segment (x,y)
+        p2: Second endpoint of the first line segment (x,y)
+        p3: First endpoint of the second line segment (x,y)
+        p4: Second endpoint of the second line segment (x,y)
     Returns:
         True if the line segments intersect, False otherwise
     """
     # Function to calculate direction
-    def orientation(p, q, r):
+    def orientation(
+        p: tuple[float, float],
+        q: tuple[float, float],
+        r: tuple[float, float],
+    ) -> float:
         return (q[1] - p[1]) * (r[0] - q[0]) - (q[0] - p[0]) * (r[1] - q[1])
     # Function to check if point q is on segment pr
-    def on_segment(p, q, r):
+    def on_segment(
+        p: tuple[float, float],
+        q: tuple[float, float],
+        r: tuple[float, float],
+    ) -> bool:
         return (
             q[0] <= max(p[0], r[0])
             and q[0] >= min(p[0], r[0])
@@ -1703,17 +1836,20 @@ def segments_intersect(p1, p2, p3, p4):
 @unique
 class StatusCodes(Enum):
+    """Enum representing status codes for operations and responses."""
     UNSET = 0
     OK = 1
     ERROR = 2
     @classmethod
-    def list_codes(cls):
+    def list_codes(cls) -> list[str]:
+        """Return a list of all status code names."""
         return [t.name for t in cls]
-def convert_element(value):
-    """Converts scalar or array to python native"""
+def convert_element(value: object) -> object:
+    """Converts scalar or array to python native."""
     val = getattr(value, "tolist", lambda: value)()
     # Check if it's a list since elements from pd indices are converted to a
     # scalar whereas pd series/dataframe elements are converted to list of 1
@@ -1734,7 +1870,7 @@ PredictionLabelTypes = (
     | bool
     | int
     | float
-    | Tuple[str, float]
+    | tuple[str, float]
     | ObjectDetectionLabel
     | RankingPredictionLabel
     | MultiClassPredictionLabel
@@ -1745,7 +1881,7 @@ ActualLabelTypes = (
     | bool
     | int
     | float
-    | Tuple[str, float]
+    | tuple[str, float]
     | ObjectDetectionLabel
     | RankingActualLabel
     | MultiClassActualLabel

arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

arize 8.0.0a21py3-none-any.whl → 8.0.0a23py3-none-any.whl