PyPI - arize - Versions diffs - 8.0.0b1__py3-none-any.whl → 8.0.0b2__py3-none-any.whl - Mend

arize 8.0.0b1py3-none-any.whl → 8.0.0b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

arize/__init__.py +1 -1
arize/_client_factory.py +50 -0
arize/_flight/client.py +4 -4
arize/_generated/api_client/api/datasets_api.py +6 -6
arize/_generated/api_client/api/experiments_api.py +6 -6
arize/_generated/api_client/api/projects_api.py +3 -3
arize/_lazy.py +25 -9
arize/client.py +6 -16
arize/config.py +9 -36
arize/constants/ml.py +9 -16
arize/constants/spans.py +5 -10
arize/datasets/client.py +13 -9
arize/datasets/errors.py +1 -1
arize/datasets/validation.py +2 -2
arize/embeddings/auto_generator.py +2 -2
arize/embeddings/errors.py +2 -2
arize/embeddings/tabular_generators.py +1 -1
arize/exceptions/base.py +0 -52
arize/exceptions/parameters.py +0 -329
arize/experiments/client.py +14 -7
arize/experiments/evaluators/base.py +6 -6
arize/experiments/evaluators/executors.py +10 -3
arize/experiments/evaluators/types.py +2 -2
arize/experiments/functions.py +18 -11
arize/experiments/types.py +3 -5
arize/logging.py +1 -1
arize/ml/batch_validation/errors.py +10 -1004
arize/ml/batch_validation/validator.py +273 -225
arize/ml/casting.py +7 -7
arize/ml/client.py +12 -11
arize/ml/proto.py +6 -6
arize/ml/stream_validation.py +2 -3
arize/ml/surrogate_explainer/mimic.py +3 -3
arize/ml/types.py +1 -55
arize/pre_releases.py +6 -3
arize/projects/client.py +9 -4
arize/regions.py +2 -2
arize/spans/client.py +13 -11
arize/spans/columns.py +32 -36
arize/spans/conversion.py +5 -6
arize/spans/validation/common/argument_validation.py +3 -3
arize/spans/validation/common/dataframe_form_validation.py +6 -6
arize/spans/validation/common/value_validation.py +1 -1
arize/spans/validation/evals/dataframe_form_validation.py +4 -4
arize/spans/validation/evals/evals_validation.py +6 -6
arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
arize/spans/validation/spans/dataframe_form_validation.py +2 -2
arize/spans/validation/spans/spans_validation.py +6 -6
arize/utils/arrow.py +2 -2
arize/utils/cache.py +2 -2
arize/utils/dataframe.py +4 -4
arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
arize/utils/openinference_conversion.py +10 -10
arize/utils/proto.py +1 -1
arize/version.py +1 -1
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/METADATA +23 -6
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/RECORD +60 -59
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0

arize/__init__.py CHANGED Viewed

@@ -37,7 +37,7 @@ def make_to_df(field_name: str) -> object:
         json_normalize: bool = False,
         convert_dtypes: bool = True,
     ) -> object:
-        """Convert a list of objects to a pandas DataFrame.
+        """Convert a list of objects to a :class:`pandas.DataFrame`.
         Behavior:
           - If an item is a Pydantic v2 model, use `.model_dump(by_alias=...)`.

arize/_client_factory.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""Factory for creating and caching the generated OpenAPI client."""
+from __future__ import annotations
+import threading
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from arize._generated.api_client.api_client import ApiClient
+    from arize.config import SDKConfiguration
+class GeneratedClientFactory:
+    """Factory for creating and caching generated OpenAPI clients.
+    This factory is owned by ArizeClient and provides thread-safe lazy
+    initialization of the OpenAPI client used by various subclients.
+    """
+    def __init__(self, sdk_config: SDKConfiguration) -> None:
+        """Initialize the factory.
+        Args:
+            sdk_config: SDK configuration containing API settings.
+        """
+        self._sdk_config = sdk_config
+        self._client: ApiClient | None = None
+        self._lock = threading.Lock()
+    def get_client(self) -> ApiClient:
+        """Get or create the generated OpenAPI client instance.
+        Returns:
+            The shared generated API client instance.
+        """
+        if self._client is not None:
+            return self._client
+        with self._lock:
+            if self._client is not None:
+                return self._client
+            # Import lazily to avoid extra dependencies at config time
+            from arize._generated import api_client as gen
+            cfg = gen.Configuration(host=self._sdk_config.api_url)
+            if self._sdk_config.api_key:
+                cfg.access_token = self._sdk_config.api_key
+            self._client = gen.ApiClient(cfg)
+            return self._client

arize/_flight/client.py CHANGED Viewed

@@ -333,8 +333,8 @@ class ArizeFlightClient:
                 latest version.
         Returns:
-            A pandas DataFrame containing the dataset examples with JSON string columns
-            converted to dict objects.
+            :class:`pandas.DataFrame`: A pandas DataFrame containing the dataset examples
+                with JSON string columns converted to dict objects.
         Raises:
             RuntimeError: If the Flight request fails.
@@ -374,8 +374,8 @@ class ArizeFlightClient:
             experiment_id: Experiment ID to retrieve runs from.
         Returns:
-            A pandas DataFrame containing the experiment runs with JSON string columns
-            converted to dict objects.
+            :class:`pandas.DataFrame`: A pandas DataFrame containing the experiment runs
+                with JSON string columns converted to dict objects.
         Raises:
             RuntimeError: If the Flight request fails.

arize/_generated/api_client/api/datasets_api.py CHANGED Viewed

@@ -940,7 +940,7 @@ class DatasetsApi:
         self,
         dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
         dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1021,7 +1021,7 @@ class DatasetsApi:
         self,
         dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
         dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1102,7 +1102,7 @@ class DatasetsApi:
         self,
         dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
         dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1861,7 +1861,7 @@ class DatasetsApi:
     def datasets_list(
         self,
         space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,
@@ -1941,7 +1941,7 @@ class DatasetsApi:
     def datasets_list_with_http_info(
         self,
         space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,
@@ -2021,7 +2021,7 @@ class DatasetsApi:
     def datasets_list_without_preload_content(
         self,
         space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,

arize/_generated/api_client/api/experiments_api.py CHANGED Viewed

@@ -898,7 +898,7 @@ class ExperimentsApi:
     def experiments_list(
         self,
         dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,
@@ -978,7 +978,7 @@ class ExperimentsApi:
     def experiments_list_with_http_info(
         self,
         dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,
@@ -1058,7 +1058,7 @@ class ExperimentsApi:
     def experiments_list_without_preload_content(
         self,
         dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,
@@ -1211,7 +1211,7 @@ class ExperimentsApi:
     def experiments_runs_list(
         self,
         experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
-        limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1288,7 +1288,7 @@ class ExperimentsApi:
     def experiments_runs_list_with_http_info(
         self,
         experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
-        limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],
@@ -1365,7 +1365,7 @@ class ExperimentsApi:
     def experiments_runs_list_without_preload_content(
         self,
         experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
-        limit: Annotated[Optional[Annotated[int, Field(le=10000, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         _request_timeout: Union[
             None,
             Annotated[StrictFloat, Field(gt=0)],

arize/_generated/api_client/api/projects_api.py CHANGED Viewed

@@ -891,7 +891,7 @@ class ProjectsApi:
     def projects_list(
         self,
         space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,
@@ -970,7 +970,7 @@ class ProjectsApi:
     def projects_list_with_http_info(
         self,
         space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,
@@ -1049,7 +1049,7 @@ class ProjectsApi:
     def projects_list_without_preload_content(
         self,
         space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
-        limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
+        limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
         cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
         _request_timeout: Union[
             None,

arize/_lazy.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # src/arize/_lazy.py
 from __future__ import annotations
+import inspect
 import logging
 import sys
 import threading
@@ -8,6 +9,8 @@ from importlib import import_module
 from typing import TYPE_CHECKING, ClassVar
 if TYPE_CHECKING:
+    import types
     from arize.config import SDKConfiguration
 logger = logging.getLogger(__name__)
@@ -22,6 +25,11 @@ class LazySubclientsMixin:
         self._lazy_cache: dict[str, object] = {}
         self._lazy_lock = threading.Lock()
+        # Add generated client factory
+        from arize._client_factory import GeneratedClientFactory
+        self._gen_client_factory = GeneratedClientFactory(sdk_config)
     def __getattr__(self, name: str) -> object:
         subs = self._SUBCLIENTS
         if name not in subs:
@@ -41,12 +49,18 @@ class LazySubclientsMixin:
             module = _dynamic_import(module_path)
             klass = getattr(module, class_name)
-            # Pass sdk_config if the child accepts it; otherwise construct bare.
-            try:
-                instance = klass(sdk_config=self.sdk_config)
-            except TypeError:
-                instance = klass()
+            # Determine which parameters this subclient needs
+            # and build kwargs accordingly
+            sig = inspect.signature(klass.__init__)
+            kwargs = {}
+            if "sdk_config" in sig.parameters:
+                kwargs["sdk_config"] = self.sdk_config
+            if "generated_client" in sig.parameters:
+                kwargs["generated_client"] = (
+                    self._gen_client_factory.get_client()
+                )
+            instance = klass(**kwargs)
             self._lazy_cache[name] = instance
             return instance
@@ -82,8 +96,8 @@ def require(
         )
-def _dynamic_import(modname: str, retries: int = 2) -> object:
-    def _attempt_import(remaining_attempts: int) -> object:
+def _dynamic_import(modname: str, retries: int = 2) -> types.ModuleType:
+    def _attempt_import(remaining_attempts: int) -> types.ModuleType:
         try:
             return import_module(modname)
         except (ModuleNotFoundError, ImportError, KeyError):
@@ -92,4 +106,6 @@ def _dynamic_import(modname: str, retries: int = 2) -> object:
                 raise
             return _attempt_import(remaining_attempts - 1)
-    return _attempt_import(retries) if retries > 0 else None
+    if retries <= 0:
+        raise ValueError(f"retries must be > 0, got {retries}")
+    return _attempt_import(retries)

arize/client.py CHANGED Viewed

@@ -20,11 +20,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-# TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
-# TODO(Kiko): Clean commented lines over the SDK
-# TODO(Kiko): Implement https://github.com/Arize-ai/arize/pull/59917
 # TODO(Kiko): Go private connect. Need a `base_domain`, such that we get:
 # - api.<base_domain>
 # - app.<base_domain>
@@ -33,29 +28,23 @@ logger = logging.getLogger(__name__)
 # TODO(Kiko): Enforce type checking, remove all type ignores
-# TODO(Kiko): Go over docstrings
-# TODO(Kiko): Missing parameter descriptions in some docstrings
-# TODO(Kiko): Missing return descriptions in some docstrings
 # TODO(Kiko): Go over headers on each logging call
+# TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
 # TODO(Kiko): Need to implement 'Update existing examples in a dataset'
-# TODO(Kiko): why logs don't show on scripts, only on jupyter notebooks
 # TODO(Kiko): test caching in colab environment
 # TODO(Kiko): Protobuf versioning is too old
 # TODO(Kiko): Go through main APIs and add CtxAdapter where missing
 # TODO(Kiko): Search and handle other TODOs
-# TODO(Kiko): Go over **every file** and do not import anything at runtime, use `if TYPE_CHECKING`
-# with `from __future__ import annotations` (must include for Python < 3.11)
 class ArizeClient(LazySubclientsMixin):
     """Root client for the Arize SDK.
     The ArizeClient provides access to all Arize platform services including datasets,
-    experiments, ML models, projects, and spans. It uses SDKConfiguration internally to
-    manage configuration settings.
+    experiments, ML models, projects, and spans. It uses :class:`arize.config.SDKConfiguration`
+    internally to manage configuration settings.
     All parameters are optional (except api_key which must be provided via argument
     or environment variable). For each parameter, values are resolved in this order:
@@ -149,7 +138,7 @@ class ArizeClient(LazySubclientsMixin):
             api_key: Arize API key for authentication. Required - must be provided here
                 or via ARIZE_API_KEY environment variable. Raises MissingAPIKeyError if not set.
             region: Arize region (e.g., Region.US_CENTRAL, Region.EU_WEST). When specified,
-                overrides individual host/port settings. ENV: ARIZE_REGION. Default: Region.UNSPECIFIED.
+                overrides individual host/port settings. ENV: ARIZE_REGION. Default: Region.UNSET.
             api_host: Custom API endpoint host. ENV: ARIZE_API_HOST. Default: "api.arize.com".
             api_scheme: API endpoint scheme (http/https). ENV: ARIZE_API_SCHEME. Default: "https".
             otlp_host: OTLP endpoint host. ENV: ARIZE_OTLP_HOST. Default: "otlp.arize.com".
@@ -184,7 +173,8 @@ class ArizeClient(LazySubclientsMixin):
         Notes:
             Values provided to this class override environment variables, which in turn
-            override default values. See SDKConfiguration for detailed parameter documentation.
+            override default values. See :class:`arize.config.SDKConfiguration`
+            for detailed parameter documentation.
         """
         cfg_kwargs: dict = {}
         if api_key is not None:

arize/config.py CHANGED Viewed

@@ -3,10 +3,8 @@
 import logging
 import os
 import sys
-import threading
 from dataclasses import dataclass, field, fields
 from pathlib import Path
-from typing import Any
 from arize.constants.config import (
     DEFAULT_API_HOST,
@@ -167,6 +165,9 @@ def _parse_bool(val: bool | str | None) -> bool:
 class SDKConfiguration:
     """Configuration for the Arize SDK with endpoint and authentication settings.
+    This class holds pure configuration data and does not manage client lifecycle.
+    Client creation and caching is handled by :class:`arize.ArizeClient`.
     This class is used internally by ArizeClient to manage SDK configuration. It is not
     recommended to use this class directly; users should interact with ArizeClient
     instead.
@@ -225,13 +226,16 @@ class SDKConfiguration:
         region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
             individual host/port settings.
             Environment variable: ARIZE_REGION.
-            Default: Region.UNSPECIFIED.
+            Default: :class:`Region.UNSET`.
         single_host: Single host to use for all endpoints. Overrides individual host settings.
             Environment variable: ARIZE_SINGLE_HOST.
             Default: "" (not set).
         single_port: Single port to use for all endpoints. Overrides individual port settings (0-65535).
             Environment variable: ARIZE_SINGLE_PORT.
             Default: 0 (not set).
+    Raises:
+        MissingAPIKeyError: If api_key is not provided via argument or environment variable.
     """
     api_key: str = field(
@@ -323,17 +327,11 @@ class SDKConfiguration:
         )
     )
-    # Private, excluded from comparisons & repr
-    _gen_client: Any = field(default=None, repr=False, compare=False)
-    _gen_lock: threading.Lock = field(
-        default_factory=threading.Lock, repr=False, compare=False
-    )
     def __post_init__(self) -> None:
         """Validate and configure SDK endpoints after initialization.
         Raises:
-            MissingAPIKeyError: If API key is not provided.
+            MissingAPIKeyError: If api_key is not provided via argument or environment variable.
         """
         # Validate Configuration
         if not self.api_key:
@@ -341,7 +339,7 @@ class SDKConfiguration:
         has_single_host = bool(self.single_host)
         has_single_port = self.single_port != 0
-        has_region = self.region is not Region.UNSPECIFIED
+        has_region = self.region is not Region.UNSET
         if (has_single_host or has_single_port) and has_region:
             logger.info(
                 "Multiple endpoint override options provided. Preference order is: "
@@ -444,28 +442,3 @@ class SDKConfiguration:
             lines.append(f"  {f.name}={val!r},")
         lines.append(")")
         return "\n".join(lines)
-    # TODO(Kiko): This may not be well placed in this class
-    def get_generated_client(self) -> object:
-        """Get or create the generated OpenAPI client instance."""
-        # If already cached, return immediately
-        if self._gen_client is not None:
-            return self._gen_client
-        # Thread-safe initialization
-        with self._gen_lock:
-            if self._gen_client is not None:
-                return self._gen_client
-            # Import lazily so extra dependencies can be
-            # enforced outside the configuration class
-            from arize._generated import api_client as gen
-            cfg = gen.Configuration(host=self.api_url)
-            if self.api_key:
-                cfg.access_token = self.api_key
-            client = gen.ApiClient(cfg)
-            # Bypass frozen to set the cache once
-            object.__setattr__(self, "_gen_client", client)
-            return client

arize/constants/ml.py CHANGED Viewed

@@ -3,50 +3,43 @@
 import json
 from pathlib import Path
-# MAX_BYTES_PER_BULK_RECORD = 100000
-# MAX_DAYS_WITHIN_RANGE = 365
 MIN_PREDICTION_ID_LEN = 1
 MAX_PREDICTION_ID_LEN = 512
 MIN_DOCUMENT_ID_LEN = 1
 MAX_DOCUMENT_ID_LEN = 128
-# # The maximum number of character for tag values
+# The maximum number of character for tag values
 MAX_TAG_LENGTH = 20_000
 MAX_TAG_LENGTH_TRUNCATION = 1_000
-# # The maximum number of character for embedding raw data
+# The maximum number of character for embedding raw data
 MAX_RAW_DATA_CHARACTERS = 2_000_000
 MAX_RAW_DATA_CHARACTERS_TRUNCATION = 5_000
 # The maximum number of acceptable years in the past from current time for prediction_timestamps
 MAX_PAST_YEARS_FROM_CURRENT_TIME = 5
 # The maximum number of acceptable years in the future from current time for prediction_timestamps
 MAX_FUTURE_YEARS_FROM_CURRENT_TIME = 1
-# # The maximum number of character for llm model name
+# The maximum number of character for llm model name
 MAX_LLM_MODEL_NAME_LENGTH = 20_000
 MAX_LLM_MODEL_NAME_LENGTH_TRUNCATION = 50
-# # The maximum number of character for prompt template
+# The maximum number of character for prompt template
 MAX_PROMPT_TEMPLATE_LENGTH = 50_000
 MAX_PROMPT_TEMPLATE_LENGTH_TRUNCATION = 5_000
-# # The maximum number of character for prompt template version
+# The maximum number of character for prompt template version
 MAX_PROMPT_TEMPLATE_VERSION_LENGTH = 20_000
 MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
-# # The maximum number of embeddings
+# The maximum number of embeddings
 MAX_NUMBER_OF_EMBEDDINGS = 30
 MAX_EMBEDDING_DIMENSIONALITY = 20_000
-# # The maximum number of classes for multi class
+# The maximum number of classes for multi class
 MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 500
 MAX_MULTI_CLASS_NAME_LENGTH = 100
 # The maximum number of references in embedding similarity search params
 MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
-#
-# # Arize generated columns
-# GENERATED_PREDICTION_LABEL_COL = "arize_generated_prediction_label"
-# GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
-#
-# # reserved columns for LLM run metadata
+# reserved columns for LLM run metadata
 LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count"  # noqa: S105
 LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count"  # noqa: S105
 LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count"  # noqa: S105
 LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
-#
 # all reserved tags
 RESERVED_TAG_COLS = [
     LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME,

arize/constants/spans.py CHANGED Viewed

@@ -5,19 +5,15 @@ DEFAULT_DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%f+00:00"
 # Minumum/Maximum number of characters for span/trace/parent ids in spans
 SPAN_ID_MIN_STR_LENGTH = 12
 SPAN_ID_MAX_STR_LENGTH = 128
-# # Minumum/Maximum number of characters for span name
+# Minumum/Maximum number of characters for span name
 SPAN_NAME_MIN_STR_LENGTH = 0
 SPAN_NAME_MAX_STR_LENGTH = 50
-# # Minumum/Maximum number of characters for span status message
+# Minumum/Maximum number of characters for span status message
 SPAN_STATUS_MSG_MIN_STR_LENGTH = 0
 SPAN_STATUS_MSG_MAX_STR_LENGTH = 10_000
-# # Minumum/Maximum number of characters for span event name
+# Minumum/Maximum number of characters for span event name
 SPAN_EVENT_NAME_MAX_STR_LENGTH = 100
-# # Minumum/Maximum number of characters for span event attributes
-# SPAN_EVENT_ATTRS_MAX_STR_LENGTH = 10_000
-# # Maximum number of characters for span kind
-# SPAN_KIND_MAX_STR_LENGTH = 100
-# SPAN_EXCEPTION_TYPE_MAX_STR_LENGTH = 100
+# Minumum/Maximum number of characters for span event attributes
 SPAN_EXCEPTION_MESSAGE_MAX_STR_LENGTH = 100
 SPAN_EXCEPTION_STACK_TRACE_MAX_STR_LENGTH = 10_000
 SPAN_IO_VALUE_MAX_STR_LENGTH = 4_000_000
@@ -29,7 +25,6 @@ SPAN_LLM_MESSAGE_ROLE_MAX_STR_LENGTH = 100
 SPAN_LLM_MESSAGE_CONTENT_MAX_STR_LENGTH = 4_000_000
 SPAN_LLM_TOOL_CALL_FUNCTION_NAME_MAX_STR_LENGTH = 500
 SPAN_LLM_PROMPT_TEMPLATE_MAX_STR_LENGTH = 4_000_000
-# SPAN_LLM_PROMPT_TEMPLATE_VARIABLES_MAX_STR_LENGTH = 10_000
 SPAN_LLM_PROMPT_TEMPLATE_VERSION_MAX_STR_LENGTH = 100
 SPAN_TOOL_NAME_MAX_STR_LENGTH = 100
 SPAN_TOOL_DESCRIPTION_MAX_STR_LENGTH = 1_000
@@ -43,7 +38,7 @@ JSON_STRING_MAX_STR_LENGTH = 4_000_000
 EVAL_LABEL_MIN_STR_LENGTH = 1  # we do not accept empty strings
 EVAL_LABEL_MAX_STR_LENGTH = 100
 EVAL_EXPLANATION_MAX_STR_LENGTH = 10_000
-#
 # # Annotation related constants
 ANNOTATION_LABEL_MIN_STR_LENGTH = 1
 ANNOTATION_LABEL_MAX_STR_LENGTH = 100  # Max length for annotation label string

arize/datasets/client.py CHANGED Viewed

@@ -24,6 +24,7 @@ from arize.utils.openinference_conversion import (
 from arize.utils.size import get_payload_size_mb
 if TYPE_CHECKING:
+    from arize._generated.api_client.api_client import ApiClient
     from arize.config import SDKConfiguration
 logger = logging.getLogger(__name__)
@@ -41,18 +42,21 @@ class DatasetsClient:
     :class:`arize.config.SDKConfiguration`.
     """
-    def __init__(self, *, sdk_config: SDKConfiguration) -> None:
+    def __init__(
+        self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
+    ) -> None:
         """
         Args:
             sdk_config: Resolved SDK configuration.
+            generated_client: Shared generated API client instance.
         """  # noqa: D205, D212
         self._sdk_config = sdk_config
         # Import at runtime so it's still lazy and extras-gated by the parent
         from arize._generated import api_client as gen
-        # Use the shared generated client from the config
-        self._api = gen.DatasetsApi(self._sdk_config.get_generated_client())
+        # Use the provided client directly
+        self._api = gen.DatasetsApi(generated_client)
     @prerelease_endpoint(key="datasets.list", stage=ReleaseStage.BETA)
     def list(
@@ -117,7 +121,7 @@ class DatasetsClient:
             space_id: Space ID to create the dataset in.
             examples: Dataset examples either as:
                 - a list of JSON-like dicts, or
-                - a pandas DataFrame (will be converted to records for REST).
+                - a :class:`pandas.DataFrame` (will be converted to records for REST).
             force_http: If True, force REST upload even if the payload exceeds the
                 configured REST payload threshold.
@@ -125,7 +129,7 @@ class DatasetsClient:
             The created dataset object as returned by the API.
         Raises:
-            TypeError: If `examples` is not a list of dicts or a pandas DataFrame.
+            TypeError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
             RuntimeError: If the Flight upload path is selected and the Flight request
                 fails.
             arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -205,7 +209,8 @@ class DatasetsClient:
         Args:
             dataset_id: Dataset ID to delete.
-        Returns: This method returns None on success (common empty 204 response)
+        Returns:
+            This method returns None on success (common empty 204 response).
         Raises:
             arize._generated.api_client.exceptions.ApiException: If the REST API
@@ -354,14 +359,13 @@ class DatasetsClient:
                 the latest dataset version is selected.
             examples: Examples to append, provided as either:
                 - a list of JSON-like dicts, or
-                - a pandas DataFrame (converted to records before upload).
+                - a :class:`pandas.DataFrame` (converted to records before upload).
         Returns:
             The updated dataset object. To see the examples, use `list_examples()`.
         Raises:
-            AssertionError: If `examples` is not a list of dicts or a pandas
-                DataFrame.
+            AssertionError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
             arize._generated.api_client.exceptions.ApiException: If the REST API
                 returns an error response (e.g. 400/401/403/404/429).
         """

arize/datasets/errors.py CHANGED Viewed

@@ -80,7 +80,7 @@ class RequiredColumnsError(DatasetError):
 class EmptyDatasetError(DatasetError):
-    """Raised when dataset DataFrame has no rows."""
+    """Raised when dataset :class:`pandas.DataFrame` has no rows."""
     def error_message(self) -> str:
         """Return the error message for this exception."""

arize/datasets/validation.py CHANGED Viewed

@@ -8,12 +8,12 @@ from arize.datasets import errors as err
 def validate_dataset_df(
     df: pd.DataFrame,
 ) -> list[err.DatasetError]:
-    """Validate a dataset DataFrame for structural and content errors.
+    """Validate a dataset :class:`pandas.DataFrame` for structural and content errors.
     Checks for required columns, unique ID values, and non-empty data.
     Args:
-        df: The pandas DataFrame to validate.
+        df: The :class:`pandas.DataFrame` to validate.
     Returns:
         A list of DatasetError objects found during validation. Empty list if valid.

arize 8.0.0b1__py3-none-any.whl → 8.0.0b2__py3-none-any.whl

arize 8.0.0b1py3-none-any.whl → 8.0.0b2py3-none-any.whl