PyPI - arize - Versions diffs - 8.0.0b2__py3-none-any.whl → 8.0.0b4__py3-none-any.whl - Mend

arize 8.0.0b2py3-none-any.whl → 8.0.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

arize/__init__.py +8 -1
arize/_exporter/client.py +18 -17
arize/_exporter/parsers/tracing_data_parser.py +9 -4
arize/_exporter/validation.py +1 -1
arize/_flight/client.py +33 -13
arize/_lazy.py +37 -2
arize/client.py +61 -35
arize/config.py +168 -14
arize/constants/config.py +1 -0
arize/datasets/client.py +32 -19
arize/embeddings/auto_generator.py +14 -7
arize/embeddings/base_generators.py +15 -9
arize/embeddings/cv_generators.py +2 -2
arize/embeddings/nlp_generators.py +8 -8
arize/embeddings/tabular_generators.py +5 -5
arize/exceptions/config.py +22 -0
arize/exceptions/parameters.py +1 -1
arize/exceptions/values.py +8 -5
arize/experiments/__init__.py +4 -0
arize/experiments/client.py +17 -11
arize/experiments/evaluators/base.py +6 -3
arize/experiments/evaluators/executors.py +6 -4
arize/experiments/evaluators/rate_limiters.py +3 -1
arize/experiments/evaluators/types.py +7 -5
arize/experiments/evaluators/utils.py +7 -5
arize/experiments/functions.py +111 -48
arize/experiments/tracing.py +4 -1
arize/experiments/types.py +31 -26
arize/logging.py +53 -32
arize/ml/batch_validation/validator.py +82 -70
arize/ml/bounded_executor.py +25 -6
arize/ml/casting.py +45 -27
arize/ml/client.py +35 -28
arize/ml/proto.py +16 -17
arize/ml/stream_validation.py +63 -25
arize/ml/surrogate_explainer/mimic.py +15 -7
arize/ml/types.py +26 -12
arize/pre_releases.py +7 -6
arize/py.typed +0 -0
arize/regions.py +10 -10
arize/spans/client.py +113 -21
arize/spans/conversion.py +7 -5
arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
arize/spans/validation/annotations/value_validation.py +11 -14
arize/spans/validation/common/dataframe_form_validation.py +1 -1
arize/spans/validation/common/value_validation.py +10 -13
arize/spans/validation/evals/value_validation.py +1 -1
arize/spans/validation/metadata/argument_validation.py +1 -1
arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
arize/spans/validation/metadata/value_validation.py +23 -1
arize/utils/arrow.py +37 -1
arize/utils/online_tasks/dataframe_preprocessor.py +8 -4
arize/utils/proto.py +0 -1
arize/utils/types.py +6 -6
arize/version.py +1 -1
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/METADATA +10 -2
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/RECORD +60 -58
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0

arize/ml/batch_validation/validator.py CHANGED Viewed

@@ -6,7 +6,10 @@ import logging
 import math
 from datetime import datetime, timedelta, timezone
 from itertools import chain
-from typing import Any
+from typing import TYPE_CHECKING, Any, cast
+if TYPE_CHECKING:
+    from collections.abc import Sequence
 import numpy as np
 import pandas as pd
@@ -115,6 +118,7 @@ from arize.ml.types import (
     ModelTypes,
     PromptTemplateColumnNames,
     Schema,
+    _normalize_column_names,
     segments_intersect,
 )
 from arize.utils.types import (
@@ -412,25 +416,25 @@ class Validator:
         if isinstance(schema, Schema):
             general_checks = chain(
                 general_checks,
-                Validator._check_value_timestamp(dataframe, schema),
-                Validator._check_id_field_str_length(
+                Validator._check_value_timestamp(dataframe, schema),  # type: ignore[arg-type]
+                Validator._check_id_field_str_length(  # type: ignore[arg-type]
                     dataframe,
                     "prediction_id_column_name",
                     schema.prediction_id_column_name,
                 ),
-                Validator._check_embedding_vectors_dimensionality(
+                Validator._check_embedding_vectors_dimensionality(  # type: ignore[arg-type]
                     dataframe, schema
                 ),
-                Validator._check_embedding_raw_data_characters(
+                Validator._check_embedding_raw_data_characters(  # type: ignore[arg-type]
                     dataframe, schema
                 ),
-                Validator._check_invalid_record_prod(
+                Validator._check_invalid_record_prod(  # type: ignore[arg-type]
                     dataframe, environment, schema, model_type
                 ),
-                Validator._check_invalid_record_preprod(
+                Validator._check_invalid_record_preprod(  # type: ignore[arg-type]
                     dataframe, environment, schema, model_type
                 ),
-                Validator._check_value_tag(dataframe, schema),
+                Validator._check_value_tag(dataframe, schema),  # type: ignore[arg-type]
             )
             if model_type == ModelTypes.RANKING:
                 r_checks = chain(
@@ -555,7 +559,7 @@ class Validator:
     def _check_field_type_prompt_response(
         schema: Schema,
     ) -> list[InvalidFieldTypePromptResponse]:
-        errors = []
+        errors: list[InvalidFieldTypePromptResponse] = []
         if schema.prompt_column_names is not None and not isinstance(
             schema.prompt_column_names, (str, EmbeddingColumnNames)
         ):
@@ -679,7 +683,7 @@ class Validator:
         schema: Schema,
         required_columns_map: list[dict[str, Any]],
     ) -> tuple[bool, list[str], list[list[str]]]:
-        missing_columns = []
+        missing_columns: list[str] = []
         for item in required_columns_map:
             if model_type.name.lower() == item.get("external_model_type"):
                 is_valid_combination = False
@@ -793,7 +797,9 @@ class Validator:
             missing_columns.extend(
                 [
                     col
-                    for col in schema.feature_column_names
+                    for col in _normalize_column_names(
+                        schema.feature_column_names
+                    )
                     if col not in existing_columns
                 ]
             )
@@ -828,7 +834,7 @@ class Validator:
             missing_columns.extend(
                 [
                     col
-                    for col in schema.tag_column_names
+                    for col in _normalize_column_names(schema.tag_column_names)
                     if col not in existing_columns
                 ]
             )
@@ -1051,22 +1057,19 @@ class Validator:
         invalid_column_names = set()
         if schema.feature_column_names is not None:
-            for col in schema.feature_column_names:
+            for col in _normalize_column_names(schema.feature_column_names):
                 if isinstance(col, str) and col.endswith("_shap"):
                     invalid_column_names.add(col)
         if schema.embedding_feature_column_names is not None:
             for emb_col_names in schema.embedding_feature_column_names.values():
-                for col in emb_col_names:
-                    if (
-                        col is not None
-                        and isinstance(col, str)
-                        and col.endswith("_shap")
-                    ):
+                cols_list = [c for c in emb_col_names if c is not None]
+                for col in cols_list:
+                    if col.endswith("_shap"):
                         invalid_column_names.add(col)
         if schema.tag_column_names is not None:
-            for col in schema.tag_column_names:
+            for col in _normalize_column_names(schema.tag_column_names):
                 if isinstance(col, str) and col.endswith("_shap"):
                     invalid_column_names.add(col)
@@ -1396,7 +1399,7 @@ class Validator:
             return [
                 InvalidPredActColumnNamesForModelType(
                     model_type,
-                    None,
+                    None,  # type: ignore[arg-type]
                     [schema.multi_class_threshold_scores_column_name],
                 )
             ]
@@ -1448,7 +1451,9 @@ class Validator:
             ]
             return [
                 InvalidPredActColumnNamesForModelType(
-                    model_type, allowed_cols, wrong_cols
+                    model_type,
+                    allowed_cols,
+                    wrong_cols,  # type: ignore[arg-type]
                 )
             ]
         return []
@@ -1589,7 +1594,7 @@ class Validator:
             )
             wrong_type_cols = [
                 col
-                for col in schema.feature_column_names
+                for col in _normalize_column_names(schema.feature_column_names)
                 if col in column_types
                 and column_types[col] not in allowed_datatypes
             ]
@@ -1703,7 +1708,7 @@ class Validator:
             )
             wrong_type_cols = [
                 col
-                for col in schema.tag_column_names
+                for col in _normalize_column_names(schema.tag_column_names)
                 if col in column_types
                 and column_types[col] not in allowed_datatypes
             ]
@@ -1750,6 +1755,7 @@ class Validator:
             ("Prediction labels", schema.prediction_label_column_name),
             ("Actual labels", schema.actual_label_column_name),
         )
+        allowed_datatypes: tuple[Any, ...]
         if (
             model_type in CATEGORICAL_MODEL_TYPES
             or model_type == ModelTypes.GENERATIVE_LLM
@@ -2071,10 +2077,8 @@ class Validator:
         )
         wrong_type_cols = []
         if schema.tag_column_names:
-            if (
-                LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME
-                in schema.tag_column_names
-            ) and (
+            tag_cols = _normalize_column_names(schema.tag_column_names)
+            if (LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME in tag_cols) and (
                 LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME in column_types
                 and column_types[LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME]
                 not in allowed_datatypes
@@ -2082,10 +2086,7 @@ class Validator:
                 wrong_type_cols.append(
                     schema.llm_run_metadata_column_names.total_token_count_column_name
                 )
-            if (
-                LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME
-                in schema.tag_column_names
-            ) and (
+            if (LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME in tag_cols) and (
                 LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME in column_types
                 and column_types[LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME]
                 not in allowed_datatypes
@@ -2094,8 +2095,7 @@ class Validator:
                     schema.llm_run_metadata_column_names.prompt_token_count_column_name
                 )
             if (
-                LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME
-                in schema.tag_column_names
+                LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME in tag_cols
             ) and (
                 LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME in column_types
                 and column_types[LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME]
@@ -2104,10 +2104,7 @@ class Validator:
                 wrong_type_cols.append(
                     schema.llm_run_metadata_column_names.response_token_count_column_name
                 )
-            if (
-                LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME
-                in schema.tag_column_names
-            ) and (
+            if (LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME in tag_cols) and (
                 LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME in column_types
                 and column_types[LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME]
                 not in allowed_datatypes
@@ -2120,7 +2117,7 @@ class Validator:
             if wrong_type_cols:
                 return [
                     InvalidTypeColumns(
-                        wrong_type_columns=wrong_type_cols,
+                        wrong_type_columns=wrong_type_cols,  # type: ignore[arg-type]
                         expected_types=["int", "float"],
                     )
                 ]
@@ -2525,7 +2522,7 @@ class Validator:
             and len(dataframe)
         ):
             return True
-        return (
+        return bool(
             dataframe[col_name]
             .astype(str)
             .str.len()
@@ -2542,15 +2539,15 @@ class Validator:
         wrong_tag_cols = []
         truncated_tag_cols = []
-        for col in schema.tag_column_names:
+        for col in _normalize_column_names(schema.tag_column_names):
             # This is to be defensive, validate_params should guarantee that this column is in
             # the dataframe, via _check_missing_columns, and return an error before reaching this
             # block if not
             # Checks max tag length when any values in a column are strings
             if (
                 col in dataframe.columns
-                and dataframe[col].map(type).eq(str).any()
-            ):  # type:ignore
+                and dataframe[col].map(type).eq(str).any()  # type: ignore[arg-type]
+            ):
                 max_tag_len = (
                     dataframe[col]
                     .apply(_check_value_string_length_helper)
@@ -2574,6 +2571,7 @@ class Validator:
     def _check_value_ranking_category(
         dataframe: pd.DataFrame, schema: Schema
     ) -> list[InvalidValueMissingValue | InvalidRankingCategoryValue]:
+        col: str | None
         if schema.relevance_labels_column_name is not None:
             col = schema.relevance_labels_column_name
         elif schema.attributions_column_name is not None:
@@ -2581,7 +2579,7 @@ class Validator:
         else:
             col = schema.actual_label_column_name
         if col is not None and col in dataframe.columns:
-            if dataframe[col].isnull().values.any():  # type: ignore
+            if dataframe[col].isnull().any():
                 # do not attach duplicated missing value error
                 # which would be caught by _check_value_missing
                 return []
@@ -2661,7 +2659,11 @@ class Validator:
         invalid_pred_scores = {}
         lbound, ubound = (0, 1)
         invalid_actual_scores = False
-        errors = []
+        errors: list[
+            InvalidMultiClassClassNameLength
+            | InvalidMultiClassActScoreValue
+            | InvalidMultiClassPredScoreValue
+        ] = []
         for col in cols:
             if (
                 col is None
@@ -2711,9 +2713,9 @@ class Validator:
         if invalid_class_names:
             errors.append(InvalidMultiClassClassNameLength(invalid_class_names))
         if invalid_pred_scores:
-            errors.append(InvalidMultiClassPredScoreValue(invalid_pred_scores))
+            errors.append(InvalidMultiClassPredScoreValue(invalid_pred_scores))  # type: ignore[arg-type]
         if invalid_actual_scores:
-            errors.append(InvalidMultiClassActScoreValue(col))
+            errors.append(InvalidMultiClassActScoreValue(col))  # type: ignore[arg-type, arg-type]
         return errors
     @staticmethod
@@ -2762,7 +2764,7 @@ class Validator:
             # When a timestamp column has Date and NaN, pyarrow will be fine, but
             # pandas min/max will fail due to type incompatibility. So we check for
             # missing value first.
-            if dataframe[col].isnull().values.any():  # type: ignore
+            if dataframe[col].isnull().any():
                 return [
                     InvalidValueMissingValue("Prediction timestamp", "missing")
                 ]
@@ -2859,7 +2861,7 @@ class Validator:
         dataframe: pd.DataFrame, schema: BaseSchema, model_type: ModelTypes
     ) -> list[InvalidValueMissingValue]:
         errors = []
-        columns = ()
+        columns: tuple[tuple[str, str | None], ...] = ()
         if isinstance(schema, CorpusSchema):
             columns = (("Document ID", schema.document_id_column_name),)
         elif isinstance(schema, Schema):
@@ -3018,7 +3020,7 @@ class Validator:
         null_index = null_filter[null_filter].index.values
         if len(null_index) == 0:
             return []
-        return [InvalidRecord(columns_subset, null_index)]  # type: ignore
+        return [InvalidRecord(columns_subset, null_index)]  # type: ignore[arg-type]
     @staticmethod
     def _check_type_prediction_group_id(
@@ -3070,6 +3072,7 @@ class Validator:
     def _check_type_ranking_category(
         schema: Schema, column_types: dict[str, Any]
     ) -> list[InvalidType]:
+        col: str | None
         if schema.relevance_labels_column_name is not None:
             col = schema.relevance_labels_column_name
         elif schema.attributions_column_name is not None:
@@ -3316,7 +3319,7 @@ class Validator:
             dataframe, vector_cols_to_check
         )
-        errors = []
+        errors: list[ValidationError] = []
         if invalid_long_string_data_cols:
             errors.append(
                 InvalidValueEmbeddingRawDataTooLong(
@@ -3325,7 +3328,7 @@ class Validator:
             )
         if invalid_low_dim_vector_cols or invalid_high_dim_vector_cols:
             errors.append(
-                InvalidValueEmbeddingVectorDimensionality(
+                InvalidValueEmbeddingVectorDimensionality(  # type: ignore[arg-type]
                     invalid_low_dim_vector_cols,
                     invalid_high_dim_vector_cols,
                 )
@@ -3433,6 +3436,7 @@ class Validator:
         schema: CorpusSchema, column_types: dict[str, Any]
     ) -> list[InvalidTypeColumns]:
         invalid_types = []
+        allowed_datatypes: tuple[Any, ...]
         # Check document id
         col = schema.document_id_column_name
         if col in column_types:
@@ -3577,7 +3581,8 @@ def _check_value_bounding_boxes_coordinates_helper(
         # 'NoneType is not iterable')
         if boxes is None:
             raise InvalidBoundingBoxesCoordinates(reason="none_boxes")
-        for box in boxes:
+        # Type ignore: boxes comes from pandas Series, validated at runtime to be iterable
+        for box in boxes:  # type: ignore[attr-defined]
             if box is None or len(box) == 0:
                 raise InvalidBoundingBoxesCoordinates(
                     reason="none_or_empty_box"
@@ -3598,13 +3603,14 @@ def _box_coordinates_wrong_format(
 ) -> InvalidBoundingBoxesCoordinates | None:
     if (
         # Coordinates should be a collection of 4 floats
-        len(box_coords) != 4
+        len(box_coords) != 4  # type: ignore[arg-type]
         # Coordinates should be positive
-        or any(k < 0 for k in box_coords)
+        # Type ignore: box_coords validated at runtime to be iterable/indexable
+        or any(k < 0 for k in box_coords)  # type: ignore[attr-defined]
         # Coordinates represent the top-left & bottom-right corners of a box: x1 < x2
-        or box_coords[0] >= box_coords[2]
+        or box_coords[0] >= box_coords[2]  # type: ignore[index]
         # Coordinates represent the top-left & bottom-right corners of a box: y1 < y2
-        or box_coords[1] >= box_coords[3]
+        or box_coords[1] >= box_coords[3]  # type: ignore[index]
     ):
         return InvalidBoundingBoxesCoordinates(
             reason="boxes_coordinates_wrong_format"
@@ -3620,7 +3626,8 @@ def _check_value_bounding_boxes_categories_helper(
         # 'NoneType is not iterable')
         if categories is None:
             raise InvalidBoundingBoxesCategories(reason="none_category_list")
-        for category in categories:
+        # Type ignore: categories validated at runtime to be iterable
+        for category in categories:  # type: ignore[attr-defined]
             # Allow for empty string category, no None values
             if category is None:
                 raise InvalidBoundingBoxesCategories(reason="none_category")
@@ -3640,7 +3647,8 @@ def _check_value_bounding_boxes_scores_helper(
         # 'NoneType is not iterable')
         if scores is None:
             raise InvalidBoundingBoxesScores(reason="none_score_list")
-        for score in scores:
+        # Type ignore: scores validated at runtime to be iterable
+        for score in scores:  # type: ignore[attr-defined]
             # Confidence scores are between 0 and 1
             if score < 0 or score > 1:
                 raise InvalidBoundingBoxesScores(reason="scores_out_of_bounds")
@@ -3673,21 +3681,22 @@ def _polygon_coordinates_wrong_format(
     # Basic validations
     if (
         # Coordinates should be a collection of more than 6 floats (3 pairs of x,y coordinates)
-        len(polygon_coords) < 6
+        len(polygon_coords) < 6  # type: ignore[arg-type]
         # Coordinates should be positive
-        or any(k < 0 for k in polygon_coords)
+        # Type ignore: polygon_coords validated at runtime to be iterable
+        or any(k < 0 for k in polygon_coords)  # type: ignore[arg-type, attr-defined]
         # Coordinates should be a collection of pairs of floats
-        or len(polygon_coords) % 2 != 0
+        or len(polygon_coords) % 2 != 0  # type: ignore[arg-type]
     ):
         return InvalidPolygonCoordinates(
             reason="polygon_coordinates_wrong_format",
-            coordinates=polygon_coords,
+            coordinates=polygon_coords,  # type: ignore[arg-type]
         )
     # Convert flat list to list of points [(x1,y1), (x2,y2), ...]
+    coords_seq = cast("Sequence[float]", polygon_coords)
     points = [
-        (polygon_coords[i], polygon_coords[i + 1])
-        for i in range(0, len(polygon_coords), 2)
+        (coords_seq[i], coords_seq[i + 1]) for i in range(0, len(coords_seq), 2)
     ]
     # Check for repeated vertices
@@ -3696,7 +3705,7 @@ def _polygon_coordinates_wrong_format(
             if points[i] == points[j]:
                 return InvalidPolygonCoordinates(
                     reason="polygon_coordinates_repeated_vertices",
-                    coordinates=polygon_coords,
+                    coordinates=polygon_coords,  # type: ignore[arg-type]
                 )
     # Check for self-intersections
@@ -3717,7 +3726,7 @@ def _polygon_coordinates_wrong_format(
             ):
                 return InvalidPolygonCoordinates(
                     reason="polygon_coordinates_self_intersecting_vertices",
-                    coordinates=polygon_coords,
+                    coordinates=polygon_coords,  # type: ignore[arg-type]
                 )
     return None
@@ -3731,7 +3740,8 @@ def _check_value_polygon_coordinates_helper(
         # 'NoneType is not iterable')
         if polygons is None:
             raise InvalidPolygonCoordinates(reason="none_polygons")
-        for polygon in polygons:
+        # Type ignore: polygons validated at runtime to be iterable
+        for polygon in polygons:  # type: ignore[attr-defined]
             if polygon is None or len(polygon) == 0:
                 raise InvalidPolygonCoordinates(reason="none_or_empty_polygon")
             error = _polygon_coordinates_wrong_format(polygon)
@@ -3753,7 +3763,8 @@ def _check_value_polygon_categories_helper(
         # 'NoneType is not iterable')
         if categories is None:
             raise InvalidPolygonCategories(reason="none_category_list")
-        for category in categories:
+        # Type ignore: categories validated at runtime to be iterable
+        for category in categories:  # type: ignore[attr-defined]
             # Allow for empty string category, no None values
             if category is None:
                 raise InvalidPolygonCategories(reason="none_category")
@@ -3773,7 +3784,8 @@ def _check_value_polygon_scores_helper(
         # 'NoneType is not iterable')
         if scores is None:
             raise InvalidPolygonScores(reason="none_score_list")
-        for score in scores:
+        # Type ignore: scores validated at runtime to be iterable
+        for score in scores:  # type: ignore[attr-defined]
             # Confidence scores are between 0 and 1
             if score < 0 or score > 1:
                 raise InvalidPolygonScores(reason="scores_out_of_bounds")

arize/ml/bounded_executor.py CHANGED Viewed

@@ -24,12 +24,26 @@ class BoundedExecutor:
         self.executor = ThreadPoolExecutor(max_workers=max_workers)
         self.semaphore = BoundedSemaphore(bound + max_workers)
-    """See concurrent.futures.Executor#submit"""
     def submit(
         self, fn: Callable[..., object], *args: object, **kwargs: object
     ) -> object:
-        """Submit a callable to be executed with bounded concurrency."""
+        """Submit a callable to be executed with bounded concurrency.
+        This method blocks if the work queue is full (at the bound limit) until
+        space becomes available. Compatible with concurrent.futures.Executor.submit().
+        Args:
+            fn: The callable to execute.
+            *args: Positional arguments to pass to the callable.
+            **kwargs: Keyword arguments to pass to the callable.
+        Returns:
+            concurrent.futures.Future: A Future representing the pending execution.
+        Raises:
+            Exception: Any exception raised during submission is re-raised after
+                releasing the semaphore.
+        """
         self.semaphore.acquire()
         try:
             future = self.executor.submit(fn, *args, **kwargs)
@@ -40,8 +54,13 @@ class BoundedExecutor:
             future.add_done_callback(lambda _: self.semaphore.release())
             return future
-    """See concurrent.futures.Executor#shutdown"""
     def shutdown(self, wait: bool = True) -> None:
-        """Shutdown the executor, optionally waiting for pending tasks to complete."""
+        """Shutdown the executor, optionally waiting for pending tasks to complete.
+        Compatible with concurrent.futures.Executor.shutdown().
+        Args:
+            wait: If True, blocks until all pending tasks complete. If False,
+                returns immediately without waiting. Defaults to True.
+        """
         self.executor.shutdown(wait)

arize/ml/casting.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# type: ignore[pb2]
 """Type casting utilities for ML model data conversion."""
 from __future__ import annotations
@@ -14,8 +13,8 @@ from arize.ml.types import (
     Schema,
     TypedColumns,
     TypedValue,
+    _normalize_column_names,
 )
-from arize.utils.types import is_list_of
 if TYPE_CHECKING:
     import pandas as pd
@@ -25,7 +24,11 @@ class CastingError(Exception):
     """Raised when type casting fails for a value."""
     def __str__(self) -> str:
-        """Return a human-readable error message."""
+        """Return a human-readable error message.
+        Returns:
+            str: The formatted error message describing the casting failure.
+        """
         return self.error_message()
     def __init__(self, error_msg: str, typed_value: TypedValue) -> None:
@@ -39,7 +42,11 @@ class CastingError(Exception):
         self.typed_value = typed_value
     def error_message(self) -> str:
-        """Return the error message for this exception."""
+        """Return the error message for this exception.
+        Returns:
+            str: Detailed error message including the value, its type, target type, and failure reason.
+        """
         return (
             f"Failed to cast value {self.typed_value.value} of type {type(self.typed_value.value)} "
             f"to type {self.typed_value.type}. "
@@ -51,14 +58,18 @@ class ColumnCastingError(Exception):
     """Raised when type casting fails for a column."""
     def __str__(self) -> str:
-        """Return a human-readable error message."""
+        """Return a human-readable error message.
+        Returns:
+            str: The formatted error message describing the column casting failure.
+        """
         return self.error_message()
     def __init__(
         self,
         error_msg: str,
-        attempted_columns: str,
-        attempted_type: TypedColumns,
+        attempted_columns: list[str],
+        attempted_type: str,
     ) -> None:
         """Initialize the exception with column casting context.
@@ -72,7 +83,11 @@ class ColumnCastingError(Exception):
         self.attempted_casting_type = attempted_type
     def error_message(self) -> str:
-        """Return the error message for this exception."""
+        """Return the error message for this exception.
+        Returns:
+            str: Detailed error message including the target type, affected columns, and failure reason.
+        """
         return (
             f"Failed to cast to type {self.attempted_casting_type} "
             f"for columns: {log_a_list(self.attempted_casting_columns, 'and')}. "
@@ -84,7 +99,11 @@ class InvalidTypedColumnsError(Exception):
     """Raised when typed columns are invalid or incorrectly specified."""
     def __str__(self) -> str:
-        """Return a human-readable error message."""
+        """Return a human-readable error message.
+        Returns:
+            str: The formatted error message describing the invalid typed columns.
+        """
         return self.error_message()
     def __init__(self, field_name: str, reason: str) -> None:
@@ -98,7 +117,11 @@ class InvalidTypedColumnsError(Exception):
         self.reason = reason
     def error_message(self) -> str:
-        """Return the error message for this exception."""
+        """Return the error message for this exception.
+        Returns:
+            str: Error message describing which field has invalid typed columns and why.
+        """
         return f"The {self.field_name} TypedColumns object {self.reason}."
@@ -106,7 +129,11 @@ class InvalidSchemaFieldTypeError(Exception):
     """Raised when schema field has invalid or unexpected type."""
     def __str__(self) -> str:
-        """Return a human-readable error message."""
+        """Return a human-readable error message.
+        Returns:
+            str: The formatted error message describing the invalid schema field type.
+        """
         return self.error_message()
     def __init__(self, msg: str) -> None:
@@ -118,7 +145,11 @@ class InvalidSchemaFieldTypeError(Exception):
         self.msg = msg
     def error_message(self) -> str:
-        """Return the error message for this exception."""
+        """Return the error message for this exception.
+        Returns:
+            str: The error message describing the schema field type issue.
+        """
         return self.msg
@@ -381,23 +412,10 @@ def _convert_schema_field_types(
         Schema: A Schema, with feature and tag column names converted to the
             List[string] format expected in downstream validation.
     """
-    feature_column_names_list = (
+    feature_column_names_list = _normalize_column_names(
         schema.feature_column_names
-        if is_list_of(schema.feature_column_names, str)
-        else (
-            schema.feature_column_names.get_all_column_names()
-            if schema.feature_column_names
-            else []
-        )
-    )
-    tag_column_names_list = (
-        schema.tag_column_names
-        if is_list_of(schema.tag_column_names, str)
-        else schema.tag_column_names.get_all_column_names()
-        if schema.tag_column_names
-        else []
     )
+    tag_column_names_list = _normalize_column_names(schema.tag_column_names)
     schema_dict = {
         "feature_column_names": feature_column_names_list,

arize 8.0.0b2__py3-none-any.whl → 8.0.0b4__py3-none-any.whl

arize 8.0.0b2py3-none-any.whl → 8.0.0b4py3-none-any.whl