PyPI - arize - Versions diffs - 8.0.0b2__py3-none-any.whl → 8.0.0b4__py3-none-any.whl - Mend

arize 8.0.0b2py3-none-any.whl → 8.0.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

arize/__init__.py +8 -1
arize/_exporter/client.py +18 -17
arize/_exporter/parsers/tracing_data_parser.py +9 -4
arize/_exporter/validation.py +1 -1
arize/_flight/client.py +33 -13
arize/_lazy.py +37 -2
arize/client.py +61 -35
arize/config.py +168 -14
arize/constants/config.py +1 -0
arize/datasets/client.py +32 -19
arize/embeddings/auto_generator.py +14 -7
arize/embeddings/base_generators.py +15 -9
arize/embeddings/cv_generators.py +2 -2
arize/embeddings/nlp_generators.py +8 -8
arize/embeddings/tabular_generators.py +5 -5
arize/exceptions/config.py +22 -0
arize/exceptions/parameters.py +1 -1
arize/exceptions/values.py +8 -5
arize/experiments/__init__.py +4 -0
arize/experiments/client.py +17 -11
arize/experiments/evaluators/base.py +6 -3
arize/experiments/evaluators/executors.py +6 -4
arize/experiments/evaluators/rate_limiters.py +3 -1
arize/experiments/evaluators/types.py +7 -5
arize/experiments/evaluators/utils.py +7 -5
arize/experiments/functions.py +111 -48
arize/experiments/tracing.py +4 -1
arize/experiments/types.py +31 -26
arize/logging.py +53 -32
arize/ml/batch_validation/validator.py +82 -70
arize/ml/bounded_executor.py +25 -6
arize/ml/casting.py +45 -27
arize/ml/client.py +35 -28
arize/ml/proto.py +16 -17
arize/ml/stream_validation.py +63 -25
arize/ml/surrogate_explainer/mimic.py +15 -7
arize/ml/types.py +26 -12
arize/pre_releases.py +7 -6
arize/py.typed +0 -0
arize/regions.py +10 -10
arize/spans/client.py +113 -21
arize/spans/conversion.py +7 -5
arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
arize/spans/validation/annotations/value_validation.py +11 -14
arize/spans/validation/common/dataframe_form_validation.py +1 -1
arize/spans/validation/common/value_validation.py +10 -13
arize/spans/validation/evals/value_validation.py +1 -1
arize/spans/validation/metadata/argument_validation.py +1 -1
arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
arize/spans/validation/metadata/value_validation.py +23 -1
arize/utils/arrow.py +37 -1
arize/utils/online_tasks/dataframe_preprocessor.py +8 -4
arize/utils/proto.py +0 -1
arize/utils/types.py +6 -6
arize/version.py +1 -1
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/METADATA +10 -2
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/RECORD +60 -58
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
{arize-8.0.0b2.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0

arize/ml/client.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# type: ignore[pb2]
 """Client implementation for managing ML models in the Arize platform."""
 from __future__ import annotations
@@ -6,7 +5,7 @@ from __future__ import annotations
 import copy
 import logging
 import time
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, cast
 from arize._generated.protocol.rec import public_pb2 as pb2
 from arize._lazy import require
@@ -377,8 +376,12 @@ class MLModelsClient:
             if embedding_features or prompt or response:
                 # NOTE: Deep copy is necessary to avoid side effects on the original input dictionary
-                combined_embedding_features = (
-                    embedding_features.copy() if embedding_features else {}
+                combined_embedding_features: dict[str, str | Embedding] = (
+                    cast(
+                        "dict[str, str | Embedding]", embedding_features.copy()
+                    )
+                    if embedding_features
+                    else {}
                 )
                 # Map prompt as embedding features for generative models
                 if prompt is not None:
@@ -395,7 +398,7 @@ class MLModelsClient:
                 p.MergeFrom(embedding_feats)
             if tags or llm_run_metadata:
-                joined_tags = copy.deepcopy(tags)
+                joined_tags = copy.deepcopy(tags) if tags is not None else {}
                 if llm_run_metadata:
                     if llm_run_metadata.total_token_count is not None:
                         joined_tags[
@@ -522,7 +525,7 @@ class MLModelsClient:
             record=rec,
             headers=headers,
             timeout=timeout,
-            indexes=None,
+            indexes=None,  # type: ignore[arg-type]
         )
     def log(
@@ -668,8 +671,10 @@ class MLModelsClient:
         dataframe = remove_extraneous_columns(df=dataframe, schema=schema)
         # always validate pd.Category is not present, if yes, convert to string
+        # Type ignore: pandas.api.types.is_categorical_dtype exists but stubs may be incomplete
         has_cat_col = any(
-            ptypes.is_categorical_dtype(x) for x in dataframe.dtypes
+            ptypes.is_categorical_dtype(x)  # type: ignore[attr-defined]
+            for x in dataframe.dtypes
         )
         if has_cat_col:
             cat_cols = [
@@ -691,14 +696,15 @@ class MLModelsClient:
             from arize.ml.surrogate_explainer.mimic import Mimic
             logger.debug("Running surrogate_explainability.")
-            if schema.shap_values_column_names:
+            # Type ignore: schema typed as BaseSchema but runtime is Schema with these attrs
+            if schema.shap_values_column_names:  # type: ignore[attr-defined]
                 logger.info(
                     "surrogate_explainability=True has no effect "
                     "because shap_values_column_names is already specified in schema."
                 )
-            elif schema.feature_column_names is None or (
-                hasattr(schema.feature_column_names, "__len__")
-                and len(schema.feature_column_names) == 0
+            elif schema.feature_column_names is None or (  # type: ignore[attr-defined]
+                hasattr(schema.feature_column_names, "__len__")  # type: ignore[attr-defined]
+                and len(schema.feature_column_names) == 0  # type: ignore[attr-defined]
             ):
                 logger.info(
                     "surrogate_explainability=True has no effect "
@@ -706,7 +712,9 @@ class MLModelsClient:
                 )
             else:
                 dataframe, schema = Mimic.augment(
-                    df=dataframe, schema=schema, model_type=model_type
+                    df=dataframe,
+                    schema=schema,  # type: ignore[arg-type]
+                    model_type=model_type,
                 )
         # Convert to Arrow table
@@ -733,8 +741,8 @@ class MLModelsClient:
                 pyarrow_schema=pa_table.schema,
             )
             if errors:
-                for e in errors:
-                    logger.error(e)
+                for error in errors:
+                    logger.error(error)
                 raise ValidationFailure(errors)
         if validate:
             logger.debug("Performing values validation.")
@@ -745,8 +753,8 @@ class MLModelsClient:
                 model_type=model_type,
             )
             if errors:
-                for e in errors:
-                    logger.error(e)
+                for error in errors:
+                    logger.error(error)
                 raise ValidationFailure(errors)
         if isinstance(schema, Schema) and not schema.has_prediction_columns():
@@ -759,12 +767,12 @@ class MLModelsClient:
         if environment == Environments.CORPUS:
             proto_schema = _get_pb_schema_corpus(
-                schema=schema,
+                schema=schema,  # type: ignore[arg-type]
                 model_id=model_name,
             )
         else:
             proto_schema = _get_pb_schema(
-                schema=schema,
+                schema=schema,  # type: ignore[arg-type]
                 model_id=model_name,
                 model_version=model_version,
                 model_type=model_type,
@@ -880,6 +888,7 @@ class MLModelsClient:
     def export_to_parquet(
         self,
         *,
+        path: str,
         space_id: str,
         model_name: str,
         environment: Environments,
@@ -892,13 +901,14 @@ class MLModelsClient:
         columns: list | None = None,
         similarity_search_params: SimilaritySearchParams | None = None,
         stream_chunk_size: int | None = None,
-    ) -> pd.DataFrame:
-        """Export model data from Arize to a Parquet file and return as DataFrame.
+    ) -> None:
+        """Export model data from Arize to a Parquet file.
         Retrieves prediction and optional actual data for a model within a specified time
-        range, saves it as a Parquet file, and returns it as a :class:`pandas.DataFrame`.
+        range and writes it directly to a Parquet file at the specified path.
         Args:
+            path: The file path where the Parquet file will be written.
             space_id: The space ID where the model resides.
             model_name: The name of the model to export data from.
             environment: The environment to export from (PRODUCTION, TRAINING, or VALIDATION).
@@ -916,16 +926,12 @@ class MLModelsClient:
                 filtering.
             stream_chunk_size: Optional chunk size for streaming large result sets.
-        Returns:
-            :class:`pandas.DataFrame`: A pandas DataFrame containing the exported data.
-                The data is also saved to a Parquet file by the underlying export client.
         Raises:
             RuntimeError: If the Flight client request fails or returns no response.
         Notes:
             - Uses Apache Arrow Flight for efficient data transfer
-            - The Parquet file location is managed by the ArizeExportClient
+            - Data is written directly to the specified path as a Parquet file
             - Large exports may benefit from specifying stream_chunk_size
         """
         require(_BATCH_EXTRA, _BATCH_DEPS)
@@ -943,7 +949,8 @@ class MLModelsClient:
             exporter = ArizeExportClient(
                 flight_client=flight_client,
             )
-            return exporter.export_to_parquet(
+            exporter.export_to_parquet(
+                path=path,
                 space_id=space_id,
                 model_id=model_name,
                 environment=environment,
@@ -982,7 +989,7 @@ class MLModelsClient:
         headers: dict[str, str],
         timeout: float | None,
         indexes: tuple,
-    ) -> object:
+    ) -> cf.Future[Any]:
         """Post a record to Arize via async HTTP request with protobuf JSON serialization."""
         from google.protobuf.json_format import MessageToDict

arize/ml/proto.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# type: ignore[pb2]
 """Protocol buffer utilities for ML model data serialization."""
 from __future__ import annotations
@@ -26,7 +25,7 @@ from arize.ml.types import (
 from arize.utils.types import is_list_of
-def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
+def get_pb_dictionary(d: object | None) -> dict[str, object]:
     """Convert a dictionary to protobuf format with string keys and pb2.Value values.
     Args:
@@ -37,6 +36,8 @@ def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
     """
     if d is None:
         return {}
+    if not isinstance(d, dict):
+        return {}
     # Takes a dictionary and
     # - casts the keys as strings
     # - turns the values of the dictionary to our proto values pb2.Value()
@@ -48,7 +49,7 @@ def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
     return converted_dict
-def get_pb_value(name: str | int | float, value: pb2.Value) -> pb2.Value:
+def get_pb_value(name: object, value: pb2.Value) -> pb2.Value:
     """Convert a Python value to a protobuf Value object.
     Args:
@@ -114,7 +115,8 @@ def get_pb_label(
     Raises:
         ValueError: If model_type is not supported.
     """
-    value = convert_element(value)
+    # convert_element preserves value type but returns object for type safety
+    value = convert_element(value)  # type: ignore[assignment]
     if model_type in NUMERIC_MODEL_TYPES:
         return _get_numeric_pb_label(prediction_or_actual, value)
     if (
@@ -129,7 +131,7 @@ def get_pb_label(
     if model_type == ModelTypes.MULTI_CLASS:
         return _get_multi_class_pb_label(value)
     raise ValueError(
-        f"model_type must be one of: {[mt.prediction_or_actual for mt in ModelTypes]} "
+        f"model_type must be one of: {[mt.name for mt in ModelTypes]} "
         f"Got "
         f"{model_type} instead."
     )
@@ -197,12 +199,12 @@ def get_pb_embedding(val: Embedding) -> pb2.Embedding:
 def _get_numeric_pb_label(
     prediction_or_actual: str,
-    value: int | float,
+    value: object,
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
     if not isinstance(value, (int, float)):
         raise TypeError(
             f"Received {prediction_or_actual}_label = {value}, of type {type(value)}. "
-            + f"{[mt.prediction_or_actual for mt in NUMERIC_MODEL_TYPES]} models accept labels of "
+            + f"{[mt.name for mt in NUMERIC_MODEL_TYPES]} models accept labels of "
             f"type int or float"
         )
     if prediction_or_actual == "prediction":
@@ -214,7 +216,7 @@ def _get_numeric_pb_label(
 def _get_score_categorical_pb_label(
     prediction_or_actual: str,
-    value: bool | str | tuple[str, float],
+    value: object,
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
     sc = pb2.ScoreCategorical()
     if isinstance(value, bool):
@@ -229,7 +231,7 @@ def _get_score_categorical_pb_label(
             raise TypeError(
                 f"Received {prediction_or_actual}_label = {value}, of type "
                 f"{type(value)}[{type(value[0])}, None]. "
-                f"{[mt.prediction_or_actual for mt in CATEGORICAL_MODEL_TYPES]} models accept "
+                f"{[mt.name for mt in CATEGORICAL_MODEL_TYPES]} models accept "
                 "values of type str, bool, or Tuple[str, float]"
             )
         if not isinstance(value[0], (bool, str)) or not isinstance(
@@ -238,7 +240,7 @@ def _get_score_categorical_pb_label(
             raise TypeError(
                 f"Received {prediction_or_actual}_label = {value}, of type "
                 f"{type(value)}[{type(value[0])}, {type(value[1])}]. "
-                f"{[mt.prediction_or_actual for mt in CATEGORICAL_MODEL_TYPES]} models accept "
+                f"{[mt.name for mt in CATEGORICAL_MODEL_TYPES]} models accept "
                 "values of type str, bool, or Tuple[str or bool, float]"
             )
         if isinstance(value[0], bool):
@@ -249,7 +251,7 @@ def _get_score_categorical_pb_label(
     else:
         raise TypeError(
             f"Received {prediction_or_actual}_label = {value}, of type {type(value)}. "
-            + f"{[mt.prediction_or_actual for mt in CATEGORICAL_MODEL_TYPES]} models accept values "
+            + f"{[mt.name for mt in CATEGORICAL_MODEL_TYPES]} models accept values "
             f"of type str, bool, int, float or Tuple[str, float]"
         )
     if prediction_or_actual == "prediction":
@@ -261,10 +263,7 @@ def _get_score_categorical_pb_label(
 def _get_cv_pb_label(
     prediction_or_actual: str,
-    value: ObjectDetectionLabel
-    | SemanticSegmentationLabel
-    | InstanceSegmentationPredictionLabel
-    | InstanceSegmentationActualLabel,
+    value: object,
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
     if isinstance(value, ObjectDetectionLabel):
         return _get_object_detection_pb_label(prediction_or_actual, value)
@@ -429,7 +428,7 @@ def _get_instance_segmentation_actual_pb_label(
 def _get_ranking_pb_label(
-    value: RankingPredictionLabel | RankingActualLabel,
+    value: object,
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
     if not isinstance(value, (RankingPredictionLabel, RankingActualLabel)):
         raise InvalidValueType(
@@ -460,7 +459,7 @@ def _get_ranking_pb_label(
 def _get_multi_class_pb_label(
-    value: MultiClassPredictionLabel | MultiClassActualLabel,
+    value: object,
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
     if not isinstance(
         value, (MultiClassPredictionLabel, MultiClassActualLabel)

arize/ml/stream_validation.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# type: ignore[pb2]
 """Stream validation logic for ML model predictions."""
 from arize.constants.ml import MAX_PREDICTION_ID_LEN, MIN_PREDICTION_ID_LEN
@@ -28,20 +27,8 @@ from arize.ml.types import (
 def validate_label(
     prediction_or_actual: str,
     model_type: ModelTypes,
-    label: str
-    | bool
-    | int
-    | float
-    | tuple[str | bool, float]
-    | ObjectDetectionLabel
-    | RankingPredictionLabel
-    | RankingActualLabel
-    | SemanticSegmentationLabel
-    | InstanceSegmentationPredictionLabel
-    | InstanceSegmentationActualLabel
-    | MultiClassPredictionLabel
-    | MultiClassActualLabel,
-    embedding_features: dict[str, Embedding],
+    label: object,
+    embedding_features: dict[str, Embedding] | None,
 ) -> None:
     """Validate a label value against the specified model type.
@@ -75,8 +62,17 @@ def validate_label(
 def _validate_numeric_label(
     model_type: ModelTypes,
-    label: str | bool | int | float | tuple[str | bool, float],
+    label: object,
 ) -> None:
+    """Validate that a label is numeric (int or float) for numeric model types.
+    Args:
+        model_type: The model type being validated.
+        label: The label value to validate.
+    Raises:
+        InvalidValueType: If the label is not an int or float.
+    """
     if not isinstance(label, (float, int)):
         raise InvalidValueType(
             f"label {label}",
@@ -87,8 +83,18 @@ def _validate_numeric_label(
 def _validate_categorical_label(
     model_type: ModelTypes,
-    label: str | bool | int | float | tuple[str | bool, float],
+    label: object,
 ) -> None:
+    """Validate that a label is categorical (scalar or tuple with confidence) for categorical model types.
+    Args:
+        model_type: The model type being validated.
+        label: The label value to validate.
+    Raises:
+        InvalidValueType: If the label is not a valid categorical type (bool, int, float, str,
+            or tuple of [str/bool, float]).
+    """
     is_valid = isinstance(label, (str, bool, int, float)) or (
         isinstance(label, tuple)
         and isinstance(label[0], (str, bool))
@@ -104,12 +110,20 @@ def _validate_categorical_label(
 def _validate_cv_label(
     prediction_or_actual: str,
-    label: ObjectDetectionLabel
-    | SemanticSegmentationLabel
-    | InstanceSegmentationPredictionLabel
-    | InstanceSegmentationActualLabel,
-    embedding_features: dict[str, Embedding],
+    label: object,
+    embedding_features: dict[str, Embedding] | None,
 ) -> None:
+    """Validate a computer vision label for object detection or segmentation tasks.
+    Args:
+        prediction_or_actual: Either 'prediction' or 'actual' to indicate label context.
+        label: The CV label to validate.
+        embedding_features: Dictionary of embedding features that must contain exactly one entry.
+    Raises:
+        InvalidValueType: If the label is not a valid CV label type.
+        ValueError: If embedding_features is None or doesn't contain exactly one feature.
+    """
     if (
         not isinstance(label, ObjectDetectionLabel)
         and not isinstance(label, SemanticSegmentationLabel)
@@ -137,8 +151,16 @@ def _validate_cv_label(
 def _validate_ranking_label(
-    label: RankingPredictionLabel | RankingActualLabel,
+    label: object,
 ) -> None:
+    """Validate a ranking label for ranking model types.
+    Args:
+        label: The ranking label to validate.
+    Raises:
+        InvalidValueType: If the label is not a RankingPredictionLabel or RankingActualLabel.
+    """
     if not isinstance(label, (RankingPredictionLabel, RankingActualLabel)):
         raise InvalidValueType(
             f"label {label}",
@@ -149,8 +171,16 @@ def _validate_ranking_label(
 def _validate_generative_llm_label(
-    label: str | bool | int | float,
+    label: object,
 ) -> None:
+    """Validate a label for generative LLM model types.
+    Args:
+        label: The label value to validate.
+    Raises:
+        InvalidValueType: If the label is not a bool, int, float, or str.
+    """
     is_valid = isinstance(label, (str, bool, int, float))
     if not is_valid:
         raise InvalidValueType(
@@ -161,8 +191,16 @@ def _validate_generative_llm_label(
 def _validate_multi_class_label(
-    label: MultiClassPredictionLabel | MultiClassActualLabel,
+    label: object,
 ) -> None:
+    """Validate a multi-class label for multi-class model types.
+    Args:
+        label: The multi-class label to validate.
+    Raises:
+        InvalidValueType: If the label is not a MultiClassPredictionLabel or MultiClassActualLabel.
+    """
     if not isinstance(
         label, (MultiClassPredictionLabel, MultiClassActualLabel)
     ):

arize/ml/surrogate_explainer/mimic.py CHANGED Viewed

@@ -19,6 +19,7 @@ from arize.ml.types import (
     CATEGORICAL_MODEL_TYPES,
     NUMERIC_MODEL_TYPES,
     ModelTypes,
+    _normalize_column_names,
 )
 if TYPE_CHECKING:
@@ -60,7 +61,7 @@ class Mimic:
         df: pd.DataFrame, schema: Schema, model_type: ModelTypes
     ) -> tuple[pd.DataFrame, Schema]:
         """Augment the :class:`pandas.DataFrame` and schema with SHAP values for explainability."""
-        features = schema.feature_column_names
+        features = _normalize_column_names(schema.feature_column_names)
         X = df[features]
         if X.shape[1] == 0:
@@ -85,25 +86,32 @@ class Mimic:
                 )
             # model func requires 1 positional argument
-            def model_func(_: object) -> object:  # type: ignore
+            def model_func(_: object) -> object:
                 return np.column_stack((1 - y, y))
         elif model_type in NUMERIC_MODEL_TYPES:
-            y_col_name = schema.prediction_label_column_name
+            y_col_name_nullable: str | None = (
+                schema.prediction_label_column_name
+            )
             if schema.prediction_score_column_name is not None:
-                y_col_name = schema.prediction_score_column_name
-            y = df[y_col_name].to_numpy()
+                y_col_name_nullable = schema.prediction_score_column_name
+            if y_col_name_nullable is None:
+                raise ValueError(
+                    f"For {model_type} models, either prediction_label_column_name "
+                    "or prediction_score_column_name must be specified"
+                )
+            y = df[y_col_name_nullable].to_numpy()
             _finite_count = np.isfinite(y).sum()
             if len(y) - _finite_count:
                 raise ValueError(
                     f"To calculate surrogate explainability for {model_type}, "
                     f"predictions must not contain NaN or infinite values, but "
-                    f"{len(y) - _finite_count} NaN or infinite value(s) are found in {y_col_name}."
+                    f"{len(y) - _finite_count} NaN or infinite value(s) are found in {y_col_name_nullable}."
                 )
             # model func requires 1 positional argument
-            def model_func(_: object) -> object:  # type: ignore
+            def model_func(_: object) -> object:
                 return y
         else:

arize/ml/types.py CHANGED Viewed

@@ -2,16 +2,19 @@
 import logging
 import math
+import sys
 from collections.abc import Iterator
 from dataclasses import asdict, dataclass, replace
 from datetime import datetime
 from decimal import Decimal
 from enum import Enum, unique
 from itertools import chain
-from typing import (
-    NamedTuple,
-    Self,
-)
+from typing import NamedTuple
+if sys.version_info >= (3, 11):
+    from typing import Self
+else:
+    from typing_extensions import Self
 import numpy as np
@@ -29,6 +32,17 @@ from arize.utils.types import is_dict_of, is_iterable_of, is_list_of
 logger = logging.getLogger(__name__)
+def _normalize_column_names(
+    col_names: "list[str] | TypedColumns | None",
+) -> list[str]:
+    """Convert TypedColumns or list to a flat list of column names."""
+    if col_names is None:
+        return []
+    if isinstance(col_names, list):
+        return col_names
+    return col_names.get_all_column_names()
 @unique
 class ModelTypes(Enum):
     """Enum representing supported model types in Arize."""
@@ -190,7 +204,7 @@ class Embedding(NamedTuple):
             )
         # Fail if not all elements in list are floats
         allowed_types = (int, float, np.int16, np.int32, np.float16, np.float32)
-        if not all(isinstance(val, allowed_types) for val in self.vector):  # type: ignore
+        if not all(isinstance(val, allowed_types) for val in self.vector):
             raise TypeError(
                 f"Embedding vector must be a vector of integers and/or floats. Got "
                 f"{emb_name}.vector = {self.vector}"
@@ -269,7 +283,7 @@ class Embedding(NamedTuple):
     @staticmethod
     def _is_valid_iterable(
-        data: str | list[str] | list[float] | np.ndarray,
+        data: object,
     ) -> bool:
         """Validates that the input data field is of the correct iterable type.
@@ -1196,7 +1210,7 @@ class Schema(BaseSchema):
     actual_score_column_name: str | None = None
     shap_values_column_names: dict[str, str] | None = None
     embedding_feature_column_names: dict[str, EmbeddingColumnNames] | None = (
-        None  # type:ignore
+        None
     )
     prediction_group_id_column_name: str | None = None
     rank_column_name: str | None = None
@@ -1214,7 +1228,7 @@ class Schema(BaseSchema):
     prompt_template_column_names: PromptTemplateColumnNames | None = None
     llm_config_column_names: LLMConfigColumnNames | None = None
     llm_run_metadata_column_names: LLMRunMetadataColumnNames | None = None
-    retrieved_document_ids_column_name: list[str] | None = None
+    retrieved_document_ids_column_name: str | None = None
     multi_class_threshold_scores_column_name: str | None = None
     semantic_segmentation_prediction_column_names: (
         SemanticSegmentationColumnNames | None
@@ -1231,7 +1245,7 @@ class Schema(BaseSchema):
     def get_used_columns_counts(self) -> dict[str, int]:
         """Return a dict mapping column names to their usage count."""
-        columns_used_counts = {}
+        columns_used_counts: dict[str, int] = {}
         for field in self.__dataclass_fields__:
             if field.endswith("column_name"):
@@ -1240,7 +1254,7 @@ class Schema(BaseSchema):
                     add_to_column_count_dictionary(columns_used_counts, col)
         if self.feature_column_names is not None:
-            for col in self.feature_column_names:
+            for col in _normalize_column_names(self.feature_column_names):
                 add_to_column_count_dictionary(columns_used_counts, col)
         if self.embedding_feature_column_names is not None:
@@ -1259,7 +1273,7 @@ class Schema(BaseSchema):
                     )
         if self.tag_column_names is not None:
-            for col in self.tag_column_names:
+            for col in _normalize_column_names(self.tag_column_names):
                 add_to_column_count_dictionary(columns_used_counts, col)
         if self.shap_values_column_names is not None:
@@ -1404,7 +1418,7 @@ class CorpusSchema(BaseSchema):
     def get_used_columns_counts(self) -> dict[str, int]:
         """Return a dict mapping column names to their usage count."""
-        columns_used_counts = {}
+        columns_used_counts: dict[str, int] = {}
         if self.document_id_column_name is not None:
             add_to_column_count_dictionary(

arize/pre_releases.py CHANGED Viewed

@@ -3,15 +3,15 @@
 import functools
 import logging
 from collections.abc import Callable
-from enum import StrEnum
-from typing import TypeVar
+from enum import Enum
+from typing import TypeVar, cast
 from arize.version import __version__
 logger = logging.getLogger(__name__)
-class ReleaseStage(StrEnum):
+class ReleaseStage(Enum):
     """Enum representing the release stage of API features."""
     ALPHA = "alpha"
@@ -26,12 +26,12 @@ _F = TypeVar("_F", bound=Callable)
 def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
     article = "an" if stage is ReleaseStage.ALPHA else "a"
     return (
-        f"[{stage.upper()}] {key} is {article} {stage} API "
+        f"[{stage.value.upper()}] {key} is {article} {stage.value} API "
         f"in Arize SDK v{__version__} and may change without notice."
     )
-def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> Callable[[_F], _F]:
+def prerelease_endpoint(*, key: str, stage: ReleaseStage) -> Callable[[_F], _F]:
     """Decorate a method to emit a prerelease warning via logging once per process."""
     def deco(fn: _F) -> _F:
@@ -42,6 +42,7 @@ def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> Callable[[_F], _F]:
                 logger.warning(_format_prerelease_message(key=key, stage=stage))
             return fn(*args, **kwargs)
-        return wrapper  # type: ignore[return-value]
+        # Cast: functools.wraps preserves function signature at runtime but mypy can't verify this
+        return cast("_F", wrapper)
     return deco

arize/py.typed ADDED Viewed

File without changes

arize 8.0.0b2__py3-none-any.whl → 8.0.0b4__py3-none-any.whl

arize 8.0.0b2py3-none-any.whl → 8.0.0b4py3-none-any.whl