PyPI - arize - Versions diffs - 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl - Mend

arize 8.0.0b1py3-none-any.whl → 8.0.0b4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

arize/__init__.py +9 -2
arize/_client_factory.py +50 -0
arize/_exporter/client.py +18 -17
arize/_exporter/parsers/tracing_data_parser.py +9 -4
arize/_exporter/validation.py +1 -1
arize/_flight/client.py +37 -17
arize/_generated/api_client/api/datasets_api.py +6 -6
arize/_generated/api_client/api/experiments_api.py +6 -6
arize/_generated/api_client/api/projects_api.py +3 -3
arize/_lazy.py +61 -10
arize/client.py +66 -50
arize/config.py +175 -48
arize/constants/config.py +1 -0
arize/constants/ml.py +9 -16
arize/constants/spans.py +5 -10
arize/datasets/client.py +45 -28
arize/datasets/errors.py +1 -1
arize/datasets/validation.py +2 -2
arize/embeddings/auto_generator.py +16 -9
arize/embeddings/base_generators.py +15 -9
arize/embeddings/cv_generators.py +2 -2
arize/embeddings/errors.py +2 -2
arize/embeddings/nlp_generators.py +8 -8
arize/embeddings/tabular_generators.py +6 -6
arize/exceptions/base.py +0 -52
arize/exceptions/config.py +22 -0
arize/exceptions/parameters.py +1 -330
arize/exceptions/values.py +8 -5
arize/experiments/__init__.py +4 -0
arize/experiments/client.py +31 -18
arize/experiments/evaluators/base.py +12 -9
arize/experiments/evaluators/executors.py +16 -7
arize/experiments/evaluators/rate_limiters.py +3 -1
arize/experiments/evaluators/types.py +9 -7
arize/experiments/evaluators/utils.py +7 -5
arize/experiments/functions.py +128 -58
arize/experiments/tracing.py +4 -1
arize/experiments/types.py +34 -31
arize/logging.py +54 -33
arize/ml/batch_validation/errors.py +10 -1004
arize/ml/batch_validation/validator.py +351 -291
arize/ml/bounded_executor.py +25 -6
arize/ml/casting.py +51 -33
arize/ml/client.py +43 -35
arize/ml/proto.py +21 -22
arize/ml/stream_validation.py +64 -27
arize/ml/surrogate_explainer/mimic.py +18 -10
arize/ml/types.py +27 -67
arize/pre_releases.py +10 -6
arize/projects/client.py +9 -4
arize/py.typed +0 -0
arize/regions.py +11 -11
arize/spans/client.py +125 -31
arize/spans/columns.py +32 -36
arize/spans/conversion.py +12 -11
arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
arize/spans/validation/annotations/value_validation.py +11 -14
arize/spans/validation/common/argument_validation.py +3 -3
arize/spans/validation/common/dataframe_form_validation.py +7 -7
arize/spans/validation/common/value_validation.py +11 -14
arize/spans/validation/evals/dataframe_form_validation.py +4 -4
arize/spans/validation/evals/evals_validation.py +6 -6
arize/spans/validation/evals/value_validation.py +1 -1
arize/spans/validation/metadata/argument_validation.py +1 -1
arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
arize/spans/validation/metadata/value_validation.py +23 -1
arize/spans/validation/spans/dataframe_form_validation.py +2 -2
arize/spans/validation/spans/spans_validation.py +6 -6
arize/utils/arrow.py +38 -2
arize/utils/cache.py +2 -2
arize/utils/dataframe.py +4 -4
arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
arize/utils/openinference_conversion.py +10 -10
arize/utils/proto.py +0 -1
arize/utils/types.py +6 -6
arize/version.py +1 -1
{arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
{arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
{arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
{arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
{arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0

arize/datasets/client.py CHANGED Viewed

@@ -5,7 +5,7 @@ from __future__ import annotations
 import logging
 import time
 import uuid
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, cast
 import pandas as pd
 import pyarrow as pa
@@ -24,6 +24,11 @@ from arize.utils.openinference_conversion import (
 from arize.utils.size import get_payload_size_mb
 if TYPE_CHECKING:
+    # builtins is needed to use builtins.list in type annotations because
+    # the class has a list() method that shadows the built-in list type
+    import builtins
+    from arize._generated.api_client.api_client import ApiClient
     from arize.config import SDKConfiguration
 logger = logging.getLogger(__name__)
@@ -41,18 +46,21 @@ class DatasetsClient:
     :class:`arize.config.SDKConfiguration`.
     """
-    def __init__(self, *, sdk_config: SDKConfiguration) -> None:
+    def __init__(
+        self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
+    ) -> None:
         """
         Args:
             sdk_config: Resolved SDK configuration.
+            generated_client: Shared generated API client instance.
         """  # noqa: D205, D212
         self._sdk_config = sdk_config
         # Import at runtime so it's still lazy and extras-gated by the parent
         from arize._generated import api_client as gen
-        # Use the shared generated client from the config
-        self._api = gen.DatasetsApi(self._sdk_config.get_generated_client())
+        # Use the provided client directly
+        self._api = gen.DatasetsApi(generated_client)
     @prerelease_endpoint(key="datasets.list", stage=ReleaseStage.BETA)
     def list(
@@ -93,7 +101,7 @@ class DatasetsClient:
         *,
         name: str,
         space_id: str,
-        examples: list[dict[str, object]] | pd.DataFrame,
+        examples: builtins.list[dict[str, object]] | pd.DataFrame,
         force_http: bool = False,
     ) -> models.Dataset:
         """Create a dataset with JSON examples.
@@ -117,7 +125,7 @@ class DatasetsClient:
             space_id: Space ID to create the dataset in.
             examples: Dataset examples either as:
                 - a list of JSON-like dicts, or
-                - a pandas DataFrame (will be converted to records for REST).
+                - a :class:`pandas.DataFrame` (will be converted to records for REST).
             force_http: If True, force REST upload even if the payload exceeds the
                 configured REST payload threshold.
@@ -125,7 +133,7 @@ class DatasetsClient:
             The created dataset object as returned by the API.
         Raises:
-            TypeError: If `examples` is not a list of dicts or a pandas DataFrame.
+            TypeError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
             RuntimeError: If the Flight upload path is selected and the Flight request
                 fails.
             arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -146,7 +154,7 @@ class DatasetsClient:
             from arize._generated import api_client as gen
             data = (
-                examples.to_dict(orient="records")  # type: ignore
+                examples.to_dict(orient="records")
                 if isinstance(examples, pd.DataFrame)
                 else examples
             )
@@ -154,7 +162,8 @@ class DatasetsClient:
             body = gen.DatasetsCreateRequest(
                 name=name,
                 space_id=space_id,
-                examples=data,
+                # Cast: pandas to_dict returns dict[Hashable, Any] but API requires dict[str, Any]
+                examples=cast("list[dict[str, Any]]", data),
             )
             return self._api.datasets_create(datasets_create_request=body)
@@ -165,15 +174,12 @@ class DatasetsClient:
             "Trying to convert to DataFrame for more efficient upload via "
             "gRPC + Flight."
         )
-        data = (
-            examples
-            if isinstance(examples, pd.DataFrame)
-            else pd.DataFrame(examples)
-        )
+        if not isinstance(examples, pd.DataFrame):
+            examples = pd.DataFrame(examples)
         return self._create_dataset_via_flight(
             name=name,
             space_id=space_id,
-            examples=data,
+            examples=examples,
         )
     @prerelease_endpoint(key="datasets.get", stage=ReleaseStage.BETA)
@@ -205,7 +211,8 @@ class DatasetsClient:
         Args:
             dataset_id: Dataset ID to delete.
-        Returns: This method returns None on success (common empty 204 response)
+        Returns:
+            This method returns None on success (common empty 204 response).
         Raises:
             arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -275,7 +282,11 @@ class DatasetsClient:
             )
         if dataset_df is not None:
             return models.DatasetsExamplesList200Response(
-                examples=dataset_df.to_dict(orient="records"),  # type: ignore
+                # Cast: Pydantic validates and converts dicts to DatasetExample at runtime
+                examples=cast(
+                    "list[models.DatasetExample]",
+                    dataset_df.to_dict(orient="records"),
+                ),
                 pagination=models.PaginationMetadata(
                     has_more=False,  # Note that all=True
                 ),
@@ -316,7 +327,11 @@ class DatasetsClient:
         )
         return models.DatasetsExamplesList200Response(
-            examples=dataset_df.to_dict(orient="records"),  # type: ignore
+            # Cast: Pydantic validates and converts dicts to DatasetExample at runtime
+            examples=cast(
+                "list[models.DatasetExample]",
+                dataset_df.to_dict(orient="records"),
+            ),
             pagination=models.PaginationMetadata(
                 has_more=False,  # Note that all=True
             ),
@@ -331,7 +346,7 @@ class DatasetsClient:
         *,
         dataset_id: str,
         dataset_version_id: str = "",
-        examples: list[dict[str, object]] | pd.DataFrame,
+        examples: builtins.list[dict[str, object]] | pd.DataFrame,
     ) -> models.Dataset:
         """Append new examples to an existing dataset.
@@ -354,14 +369,13 @@ class DatasetsClient:
                 the latest dataset version is selected.
             examples: Examples to append, provided as either:
                 - a list of JSON-like dicts, or
-                - a pandas DataFrame (converted to records before upload).
+                - a :class:`pandas.DataFrame` (converted to records before upload).
         Returns:
             The updated dataset object. To see the examples, use `list_examples()`.
         Raises:
-            AssertionError: If `examples` is not a list of dicts or a pandas
-                DataFrame.
+            AssertionError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
             arize._generated.api_client.exceptions.ApiException: If the REST API
                 returns an error response (e.g. 400/401/403/404/429).
         """
@@ -373,11 +387,14 @@ class DatasetsClient:
             )
         data = (
-            examples.to_dict(orient="records")  # type: ignore
+            examples.to_dict(orient="records")
             if isinstance(examples, pd.DataFrame)
             else examples
         )
-        body = gen.DatasetsExamplesInsertRequest(examples=data)
+        # Cast: pandas to_dict returns dict[Hashable, Any] but API requires dict[str, Any]
+        body = gen.DatasetsExamplesInsertRequest(
+            examples=cast("list[dict[str, Any]]", data)
+        )
         return self._api.datasets_examples_insert(
             dataset_id=dataset_id,
@@ -390,7 +407,7 @@ class DatasetsClient:
         name: str,
         space_id: str,
         examples: pd.DataFrame,
-    ) -> object:
+    ) -> models.Dataset:
         """Internal method to create a dataset using Flight protocol for large example sets."""
         data = examples.copy()
         # Convert datetime columns to int64 (ms since epoch)
@@ -450,19 +467,19 @@ def _set_default_columns_for_dataset(df: pd.DataFrame) -> pd.DataFrame:
     """Set default values for created_at and updated_at columns if missing or null."""
     current_time = int(time.time() * 1000)
     if "created_at" in df.columns:
-        if df["created_at"].isnull().values.any():  # type: ignore
+        if df["created_at"].isnull().any():
             df["created_at"].fillna(current_time, inplace=True)
     else:
         df["created_at"] = current_time
     if "updated_at" in df.columns:
-        if df["updated_at"].isnull().values.any():  # type: ignore
+        if df["updated_at"].isnull().any():
             df["updated_at"].fillna(current_time, inplace=True)
     else:
         df["updated_at"] = current_time
     if "id" in df.columns:
-        if df["id"].isnull().values.any():  # type: ignore
+        if df["id"].isnull().any():
             df["id"] = df["id"].apply(
                 lambda x: str(uuid.uuid4()) if pd.isnull(x) else x
             )

arize/datasets/errors.py CHANGED Viewed

@@ -80,7 +80,7 @@ class RequiredColumnsError(DatasetError):
 class EmptyDatasetError(DatasetError):
-    """Raised when dataset DataFrame has no rows."""
+    """Raised when dataset :class:`pandas.DataFrame` has no rows."""
     def error_message(self) -> str:
         """Return the error message for this exception."""

arize/datasets/validation.py CHANGED Viewed

@@ -8,12 +8,12 @@ from arize.datasets import errors as err
 def validate_dataset_df(
     df: pd.DataFrame,
 ) -> list[err.DatasetError]:
-    """Validate a dataset DataFrame for structural and content errors.
+    """Validate a dataset :class:`pandas.DataFrame` for structural and content errors.
     Checks for required columns, unique ID values, and non-empty data.
     Args:
-        df: The pandas DataFrame to validate.
+        df: The :class:`pandas.DataFrame` to validate.
     Returns:
         A list of DatasetError objects found during validation. Empty list if valid.

arize/embeddings/auto_generator.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """Automatic embedding generation factory for various ML use cases."""
+from typing import TypeAlias
 import pandas as pd
 from arize.embeddings import constants
@@ -24,9 +26,14 @@ from arize.embeddings.nlp_generators import (
 from arize.embeddings.tabular_generators import (
     EmbeddingGeneratorForTabularFeatures,
 )
-from arize.embeddings.usecases import UseCases
+from arize.embeddings.usecases import (
+    CVUseCases,
+    NLPUseCases,
+    TabularUseCases,
+    UseCases,
+)
-UseCaseLike = str | UseCases.NLP | UseCases.CV | UseCases.STRUCTURED
+UseCaseLike: TypeAlias = str | NLPUseCases | CVUseCases | TabularUseCases
 class EmbeddingGenerator:
@@ -49,20 +56,20 @@ class EmbeddingGenerator:
     ) -> BaseEmbeddingGenerator:
         """Create an embedding generator for the specified use case."""
         if use_case == UseCases.NLP.SEQUENCE_CLASSIFICATION:
-            return EmbeddingGeneratorForNLPSequenceClassification(**kwargs)
+            return EmbeddingGeneratorForNLPSequenceClassification(**kwargs)  # type: ignore[arg-type]
         if use_case == UseCases.NLP.SUMMARIZATION:
-            return EmbeddingGeneratorForNLPSummarization(**kwargs)
+            return EmbeddingGeneratorForNLPSummarization(**kwargs)  # type: ignore[arg-type]
         if use_case == UseCases.CV.IMAGE_CLASSIFICATION:
-            return EmbeddingGeneratorForCVImageClassification(**kwargs)
+            return EmbeddingGeneratorForCVImageClassification(**kwargs)  # type: ignore[arg-type]
         if use_case == UseCases.CV.OBJECT_DETECTION:
-            return EmbeddingGeneratorForCVObjectDetection(**kwargs)
+            return EmbeddingGeneratorForCVObjectDetection(**kwargs)  # type: ignore[arg-type]
         if use_case == UseCases.STRUCTURED.TABULAR_EMBEDDINGS:
-            return EmbeddingGeneratorForTabularFeatures(**kwargs)
+            return EmbeddingGeneratorForTabularFeatures(**kwargs)  # type: ignore[arg-type]
         raise ValueError(f"Invalid use case {use_case}")
     @classmethod
     def list_default_models(cls) -> pd.DataFrame:
-        """Return a DataFrame of default models for each use case."""
+        """Return a :class:`pandas.DataFrame` of default models for each use case."""
         df = pd.DataFrame(
             {
                 "Area": ["NLP", "NLP", "CV", "CV", "STRUCTURED"],
@@ -87,7 +94,7 @@ class EmbeddingGenerator:
     @classmethod
     def list_pretrained_models(cls) -> pd.DataFrame:
-        """Return a DataFrame of all available pretrained models."""
+        """Return a :class:`pandas.DataFrame` of all available pretrained models."""
         data = {
             "Task": ["NLP" for _ in NLP_PRETRAINED_MODELS]
             + ["CV" for _ in CV_PRETRAINED_MODELS],

arize/embeddings/base_generators.py CHANGED Viewed

@@ -14,11 +14,15 @@ try:
     import torch
     from datasets import Dataset
     from PIL import Image
-    from transformers import (  # type: ignore
+    from transformers import (
         AutoImageProcessor,
         AutoModel,
         AutoTokenizer,
+        BaseImageProcessor,
         BatchEncoding,
+        BatchFeature,
+        PreTrainedModel,
+        PreTrainedTokenizerBase,
     )
     from transformers.utils import logging as transformer_logging
 except ImportError as e:
@@ -67,7 +71,9 @@ class BaseEmbeddingGenerator(ABC):
             raise
     @abstractmethod
-    def generate_embeddings(self, **kwargs: object) -> pd.Series:
+    def generate_embeddings(
+        self, **kwargs: object
+    ) -> pd.Series | tuple[pd.Series, pd.Series]:
         """Generate embeddings for the input data."""
         ...
@@ -95,7 +101,7 @@ class BaseEmbeddingGenerator(ABC):
         return self.__model_name
     @property
-    def model(self) -> object:
+    def model(self) -> PreTrainedModel:
         """Return the underlying model instance."""
         return self.__model
@@ -183,7 +189,7 @@ class NLPEmbeddingGenerator(BaseEmbeddingGenerator):
             tokenizer_max_length: Maximum sequence length for the tokenizer.
             **kwargs: Additional arguments for model initialization.
         """
-        super().__init__(use_case=use_case, model_name=model_name, **kwargs)
+        super().__init__(use_case=use_case, model_name=model_name, **kwargs)  # type: ignore[arg-type]
         self.__tokenizer_max_length = tokenizer_max_length
         # We don't check for the tokenizer's existence since it is coupled with the corresponding model
         # We check the model's existence in `BaseEmbeddingGenerator`
@@ -193,7 +199,7 @@ class NLPEmbeddingGenerator(BaseEmbeddingGenerator):
         )
     @property
-    def tokenizer(self) -> object:
+    def tokenizer(self) -> PreTrainedTokenizerBase:
         """Return the tokenizer instance for text processing."""
         return self.__tokenizer
@@ -240,7 +246,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
             model_name: Name of the pre-trained vision model.
             **kwargs: Additional arguments for model initialization.
         """
-        super().__init__(use_case=use_case, model_name=model_name, **kwargs)
+        super().__init__(use_case=use_case, model_name=model_name, **kwargs)  # type: ignore[arg-type]
         logger.info("Downloading image processor")
         # We don't check for the image processor's existence since it is coupled with the corresponding model
         # We check the model's existence in `BaseEmbeddingGenerator`
@@ -249,7 +255,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
         )
     @property
-    def image_processor(self) -> object:
+    def image_processor(self) -> BaseImageProcessor:
         """Return the image processor instance for image preprocessing."""
         return self.__image_processor
@@ -262,7 +268,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
     def preprocess_image(
         self, batch: dict[str, list[str]], local_image_feat_name: str
-    ) -> object:
+    ) -> BatchFeature:
         """Preprocess a batch of images for model input."""
         return self.image_processor(
             [
@@ -272,7 +278,7 @@ class CVEmbeddingGenerator(BaseEmbeddingGenerator):
             return_tensors="pt",
         ).to(self.device)
-    def generate_embeddings(self, local_image_path_col: pd.Series) -> pd.Series:
+    def generate_embeddings(self, local_image_path_col: pd.Series) -> pd.Series:  # type: ignore[override]
         """Obtain embedding vectors from your image data using pre-trained image models.
         :param local_image_path_col: a pandas Series containing the local path to the images to

arize/embeddings/cv_generators.py CHANGED Viewed

@@ -25,7 +25,7 @@ class EmbeddingGeneratorForCVImageClassification(CVEmbeddingGenerator):
         super().__init__(
             use_case=UseCases.CV.IMAGE_CLASSIFICATION,
             model_name=model_name,
-            **kwargs,
+            **kwargs,  # type: ignore[arg-type]
         )
@@ -46,5 +46,5 @@ class EmbeddingGeneratorForCVObjectDetection(CVEmbeddingGenerator):
         super().__init__(
             use_case=UseCases.CV.OBJECT_DETECTION,
             model_name=model_name,
-            **kwargs,
+            **kwargs,  # type: ignore[arg-type]
         )

arize/embeddings/errors.py CHANGED Viewed

@@ -2,7 +2,7 @@
 class InvalidIndexError(Exception):
-    """Raised when DataFrame or Series has an invalid index."""
+    """Raised when :class:`pandas.DataFrame` or Series has an invalid index."""
     def __repr__(self) -> str:
         """Return a string representation for debugging and logging."""
@@ -16,7 +16,7 @@ class InvalidIndexError(Exception):
         """Initialize the exception with field name context.
         Args:
-            field_name: Name of the DataFrame or Series field with invalid index.
+            field_name: Name of the :class:`pandas.DataFrame` or Series field with invalid index.
         """
         self.field_name = field_name

arize/embeddings/nlp_generators.py CHANGED Viewed

@@ -39,10 +39,10 @@ class EmbeddingGeneratorForNLPSequenceClassification(NLPEmbeddingGenerator):
         super().__init__(
             use_case=UseCases.NLP.SEQUENCE_CLASSIFICATION,
             model_name=model_name,
-            **kwargs,
+            **kwargs,  # type: ignore[arg-type]
         )
-    def generate_embeddings(
+    def generate_embeddings(  # type: ignore[override]
         self,
         text_col: pd.Series,
         class_label_col: pd.Series | None = None,
@@ -65,10 +65,10 @@ class EmbeddingGeneratorForNLPSequenceClassification(NLPEmbeddingGenerator):
         if class_label_col is not None:
             if not isinstance(class_label_col, pd.Series):
                 raise TypeError("class_label_col must be a pandas Series")
-            df = pd.concat(
+            temp_df = pd.concat(
                 {"text": text_col, "class_label": class_label_col}, axis=1
             )
-            prepared_text_col = df.apply(
+            prepared_text_col = temp_df.apply(
                 lambda row: f" The classification label is {row['class_label']}. {row['text']}",
                 axis=1,
             )
@@ -83,8 +83,8 @@ class EmbeddingGeneratorForNLPSequenceClassification(NLPEmbeddingGenerator):
             batched=True,
             batch_size=self.batch_size,
         )
-        df: pd.DataFrame = ds.to_pandas()
-        return df["embedding_vector"]
+        result_df: pd.DataFrame = ds.to_pandas()
+        return result_df["embedding_vector"]
 class EmbeddingGeneratorForNLPSummarization(NLPEmbeddingGenerator):
@@ -104,10 +104,10 @@ class EmbeddingGeneratorForNLPSummarization(NLPEmbeddingGenerator):
         super().__init__(
             use_case=UseCases.NLP.SUMMARIZATION,
             model_name=model_name,
-            **kwargs,
+            **kwargs,  # type: ignore[arg-type]
         )
-    def generate_embeddings(
+    def generate_embeddings(  # type: ignore[override]
         self,
         text_col: pd.Series,
     ) -> pd.Series:

arize/embeddings/tabular_generators.py CHANGED Viewed

@@ -64,10 +64,10 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
         super().__init__(
             use_case=UseCases.STRUCTURED.TABULAR_EMBEDDINGS,
             model_name=model_name,
-            **kwargs,
+            **kwargs,  # type: ignore[arg-type]
         )
-    def generate_embeddings(
+    def generate_embeddings(  # type: ignore[override]
         self,
         df: pd.DataFrame,
         selected_columns: list[str],
@@ -145,11 +145,11 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
             batch_size=self.batch_size,
         )
-        df: pd.DataFrame = ds.to_pandas()
+        result_df: pd.DataFrame = ds.to_pandas()
         if return_prompt_col:
-            return df["embedding_vector"], prompts
+            return result_df["embedding_vector"], prompts
-        return df["embedding_vector"]
+        return result_df["embedding_vector"]
     @staticmethod
     def __prompt_fn(row: pd.DataFrame, columns: list[str]) -> str:
@@ -172,5 +172,5 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
     @staticmethod
     def list_pretrained_models() -> pd.DataFrame:
-        """Return a DataFrame of available pretrained tabular models."""
+        """Return a :class:`pandas.DataFrame` of available pretrained tabular models."""
         return pd.DataFrame({"Model Name": sorted(TABULAR_PRETRAINED_MODELS)})

arize/exceptions/base.py CHANGED Viewed

@@ -39,21 +39,6 @@ class ValidationFailure(Exception):
         self.errors = errors
-# ----------------------
-# Minimum required checks
-# ----------------------
-# class InvalidColumnNameEmptyString(ValidationError):
-#     def __repr__(self) -> str:
-#         return "Invalid_Column_Name_Empty_String"
-#
-#     def error_message(self) -> str:
-#         return (
-#             "Empty column name found: ''. The schema cannot point to columns in the "
-#             "dataframe denoted by an empty string. You can see the columns used in the "
-#             "schema by running schema.get_used_columns()"
-#         )
 class InvalidFieldTypeConversion(ValidationError):
     """Raised when fields cannot be converted to required type."""
@@ -79,31 +64,6 @@ class InvalidFieldTypeConversion(ValidationError):
         )
-# class InvalidFieldTypeEmbeddingFeatures(ValidationError):
-#     def __repr__(self) -> str:
-#         return "Invalid_Input_Type_Embedding_Features"
-#
-#     def __init__(self) -> None:
-#         pass
-#
-#     def error_message(self) -> str:
-#         return (
-#             "schema.embedding_feature_column_names should be a dictionary mapping strings "
-#             "to EmbeddingColumnNames objects"
-#         )
-# class InvalidFieldTypePromptResponse(ValidationError):
-#     def __repr__(self) -> str:
-#         return "Invalid_Input_Type_Prompt_Response"
-#
-#     def __init__(self, name: str) -> None:
-#         self.name = name
-#
-#     def error_message(self) -> str:
-#         return f"'{self.name}' must be of type str or EmbeddingColumnNames"
 class InvalidDataFrameIndex(ValidationError):
     """Raised when DataFrame has an invalid index that needs to be reset."""
@@ -117,15 +77,3 @@ class InvalidDataFrameIndex(ValidationError):
             "The index of the dataframe is invalid; "
             "reset the index by using df.reset_index(drop=True, inplace=True)"
         )
-# class InvalidSchemaType(ValidationError):
-#     def __repr__(self) -> str:
-#         return "Invalid_Schema_Type"
-#
-#     def __init__(self, schema_type: str, environment: Environments) -> None:
-#         self.schema_type = schema_type
-#         self.environment = environment
-#
-#     def error_message(self) -> str:
-#         return f"Cannot use a {self.schema_type} for a model with environment: {self.environment}"

arize/exceptions/config.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""Configuration validation exceptions."""
+from __future__ import annotations
+class MultipleEndpointOverridesError(Exception):
+    """Raised when multiple endpoint override options are provided.
+    Only one of the following can be specified: region, single_host/single_port, or base_domain.
+    """
+    def __init__(self, message: str) -> None:
+        """Initialize the exception with an optional custom message.
+        Args:
+            message: Custom error message, or empty string.
+        """
+        self.message = message
+    def __str__(self) -> str:
+        """Return the error message."""
+        return self.message

arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl

arize 8.0.0b1py3-none-any.whl → 8.0.0b4py3-none-any.whl