PyPI - arize - Versions diffs - 8.0.0a23__py3-none-any.whl → 8.0.0b1__py3-none-any.whl - Mend

arize 8.0.0a23py3-none-any.whl → 8.0.0b1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

arize/__init__.py +11 -10
arize/_exporter/client.py +1 -1
arize/_generated/api_client/__init__.py +0 -2
arize/_generated/api_client/models/__init__.py +0 -1
arize/_generated/api_client/models/datasets_create_request.py +2 -10
arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
arize/_generated/api_client_README.md +0 -1
arize/client.py +47 -163
arize/config.py +59 -100
arize/datasets/client.py +11 -6
arize/embeddings/nlp_generators.py +12 -6
arize/embeddings/tabular_generators.py +14 -11
arize/experiments/__init__.py +12 -0
arize/experiments/client.py +13 -9
arize/experiments/functions.py +6 -6
arize/experiments/types.py +3 -3
arize/{models → ml}/batch_validation/errors.py +2 -2
arize/{models → ml}/batch_validation/validator.py +5 -3
arize/{models → ml}/casting.py +42 -78
arize/{models → ml}/client.py +19 -17
arize/{models → ml}/proto.py +2 -2
arize/{models → ml}/stream_validation.py +1 -1
arize/{models → ml}/surrogate_explainer/mimic.py +6 -2
arize/{types.py → ml/types.py} +99 -234
arize/pre_releases.py +2 -1
arize/projects/client.py +11 -6
arize/spans/client.py +91 -86
arize/spans/conversion.py +11 -4
arize/spans/validation/common/value_validation.py +1 -1
arize/spans/validation/spans/dataframe_form_validation.py +1 -1
arize/spans/validation/spans/value_validation.py +2 -1
arize/utils/dataframe.py +1 -1
arize/utils/online_tasks/dataframe_preprocessor.py +5 -6
arize/utils/types.py +105 -0
arize/version.py +1 -1
{arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/METADATA +56 -59
{arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/RECORD +50 -51
arize/_generated/api_client/models/primitive_value.py +0 -172
arize/_generated/api_client/test/test_primitive_value.py +0 -50
/arize/{models → ml}/__init__.py +0 -0
/arize/{models → ml}/batch_validation/__init__.py +0 -0
/arize/{models → ml}/bounded_executor.py +0 -0
/arize/{models → ml}/surrogate_explainer/__init__.py +0 -0
{arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/WHEEL +0 -0
{arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/licenses/LICENSE +0 -0
{arize-8.0.0a23.dist-info → arize-8.0.0b1.dist-info}/licenses/NOTICE +0 -0

arize/spans/client.py CHANGED Viewed

@@ -26,7 +26,7 @@ from arize.exceptions.base import (
 from arize.exceptions.models import MissingProjectNameError
 from arize.exceptions.spaces import MissingSpaceIDError
 from arize.logging import CtxAdapter
-from arize.types import Environments, SimilaritySearchParams
+from arize.ml.types import Environments
 from arize.utils.arrow import post_arrow_table
 from arize.utils.dataframe import (
     remove_extraneous_columns,
@@ -44,14 +44,18 @@ logger = logging.getLogger(__name__)
 class SpansClient:
-    """Client for logging LLM tracing spans and evaluations to Arize."""
+    """Client for logging LLM tracing spans and evaluations to Arize.
-    def __init__(self, *, sdk_config: SDKConfiguration) -> None:
-        """Initialize the spans client with SDK configuration.
+    This class is primarily intended for internal use within the SDK. Users are
+    highly encouraged to access resource-specific functionality via
+    :class:`arize.ArizeClient`.
+    """
-        Args:
-            sdk_config: SDK configuration containing API endpoints and credentials.
+    def __init__(self, *, sdk_config: SDKConfiguration) -> None:
         """
+        Args:
+            sdk_config: Resolved SDK configuration.
+        """  # noqa: D205, D212
         self._sdk_config = sdk_config
     def log(
@@ -72,23 +76,23 @@ class SpansClient:
         successful delivery of records.
         Args:
-            space_id (str): The space ID where the project resides.
-            project_name (str): A unique name to identify your project in the Arize platform.
-            dataframe (pd.DataFrame): The dataframe containing the LLM traces.
-            evals_dataframe (pd.DataFrame, optional): A dataframe containing LLM evaluations data.
+            space_id: The space ID where the project resides.
+            project_name: A unique name to identify your project in the Arize platform.
+            dataframe: The dataframe containing the LLM traces.
+            evals_dataframe: A dataframe containing LLM evaluations data.
                 The evaluations are joined to their corresponding spans via a left outer join, i.e.,
                 using only `context.span_id` from the spans dataframe. Defaults to None.
-            datetime_format (str): format for the timestamp captured in the LLM traces.
+            datetime_format: format for the timestamp captured in the LLM traces.
                 Defaults to "%Y-%m-%dT%H:%M:%S.%f+00:00".
-            validate (bool, optional): When set to True, validation is run before sending data.
+            validate: When set to True, validation is run before sending data.
                 Defaults to True.
-            timeout (float, optional): You can stop waiting for a response after a given number
+            timeout: You can stop waiting for a response after a given number
                 of seconds with the timeout parameter. Defaults to None.
-            tmp_dir (str, optional): Temporary directory/file to store the serialized data in binary
+            tmp_dir: Temporary directory/file to store the serialized data in binary
                 before sending to Arize.
         Returns:
-            `Response` object
+            Response object from the HTTP request.
         """
         from arize.spans.columns import (
@@ -274,15 +278,15 @@ class SpansClient:
         each evaluation to its respective span.
         Args:
-            space_id (str): The space ID where the project resides.
-            project_name (str): A unique name to identify your project in the Arize platform.
-            dataframe (pd.DataFrame): A dataframe containing LLM evaluations data.
-            validate (bool, optional): When set to True, validation is run before sending data.
+            space_id: The space ID where the project resides.
+            project_name: A unique name to identify your project in the Arize platform.
+            dataframe: A dataframe containing LLM evaluations data.
+            validate: When set to True, validation is run before sending data.
                 Defaults to True.
-            force_http (bool, optional): Force the use of HTTP for data upload. Defaults to False.
-            timeout (float, optional): You can stop waiting for a response after a given number
+            force_http: Force the use of HTTP for data upload. Defaults to False.
+            timeout: You can stop waiting for a response after a given number
                 of seconds with the timeout parameter. Defaults to None.
-            tmp_dir (str, optional): Temporary directory/file to store the serialized data in binary
+            tmp_dir: Temporary directory/file to store the serialized data in binary
                 before sending to Arize.
         """
         from arize.spans.columns import EVAL_COLUMN_PATTERN, SPAN_SPAN_ID_COL
@@ -447,10 +451,10 @@ class SpansClient:
         `annotation.notes` column can be included for free-form text notes.
         Args:
-            space_id (str): The space ID where the project resides.
-            project_name (str): A unique name to identify your project in the Arize platform.
-            dataframe (pd.DataFrame): A dataframe containing LLM annotation data.
-            validate (bool, optional): When set to True, validation is run before sending data.
+            space_id: The space ID where the project resides.
+            project_name: A unique name to identify your project in the Arize platform.
+            dataframe: A dataframe containing LLM annotation data.
+            validate: When set to True, validation is run before sending data.
                 Defaults to True.
         """
         from arize.spans.columns import (
@@ -661,6 +665,7 @@ class SpansClient:
         This method is only supported for LLM model types.
         The dataframe must contain a column `context.span_id` to identify spans and either:
         1. A column with JSON patch documents (specified by patch_document_column_name), or
         2. One or more columns with prefix `attributes.metadata.` that will be automatically
            converted to a patch document (e.g., `attributes.metadata.tag` → `{"tag": value}`).
@@ -668,7 +673,8 @@ class SpansClient:
         If both methods are used, the explicit patch document is applied after the individual field updates.
         The patches will be applied to the `attributes.metadata` field of each span.
-        **Type Handling:**
+        Type Handling:
         - The client primarily supports string, integer, and float data types.
         - Boolean values are converted to string representations.
         - Nested JSON objects and arrays are serialized to JSON strings during transmission.
@@ -685,12 +691,14 @@ class SpansClient:
         Returns:
             Dictionary containing update results with the following keys:
                 - spans_processed: Total number of spans in the input dataframe
                 - spans_updated: Count of successfully updated span metadata records
                 - spans_failed: Count of spans that failed to update
                 - errors: List of dictionaries with 'span_id' and 'error_message' keys for each failed span
-            Error types from the server include:
+                Error types from the server include:
                 - parse_failure: Failed to parse JSON metadata
                 - patch_failure: Failed to apply JSON patch
                 - type_conflict: Type conflict in metadata
@@ -699,58 +707,60 @@ class SpansClient:
                 - druid_rejection: Backend rejected the update
         Raises:
-            AuthError: When API key or space ID is missing
-            ValidationFailure: When validation of the dataframe or values fails
-            ImportError: When required tracing dependencies are missing
-            ArrowInvalid: When the dataframe cannot be converted to Arrow format
-            RuntimeError: If the request fails or no response is received
-        Example:
-            ```python
-            # Method 1: Using a patch document
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1", "span2"],
-                    "patch_document": [
-                        {"tag": "important"},
-                        {"priority": "high"},
-                    ],
-                }
-            )
-            # Method 2: Using direct field columns
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1", "span2"],
-                    "attributes.metadata.tag": ["important", "standard"],
-                    "attributes.metadata.priority": ["high", "medium"],
-                }
-            )
-            # Method 3: Combining both approaches
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1"],
-                    "attributes.metadata.tag": ["important"],
-                    "patch_document": [
-                        {"priority": "high"}
-                    ],  # This will override any conflicting fields
-                }
-            )
-            # Method 4: Setting fields to null
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1"],
-                    "attributes.metadata.old_field": [
-                        None
-                    ],  # Sets field to JSON null
-                    "patch_document": [
-                        {"other_field": None}
-                    ],  # Also sets field to JSON null
-                }
-            )
-            ```
+            AuthError: When API key or space ID is missing.
+            ValidationFailure: When validation of the dataframe or values fails.
+            ImportError: When required tracing dependencies are missing.
+            ArrowInvalid: When the dataframe cannot be converted to Arrow format.
+            RuntimeError: If the request fails or no response is received.
+        Examples:
+            Method 1: Using a patch document
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1", "span2"],
+            ...         "patch_document": [
+            ...             {"tag": "important"},
+            ...             {"priority": "high"},
+            ...         ],
+            ...     }
+            ... )
+            Method 2: Using direct field columns
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1", "span2"],
+            ...         "attributes.metadata.tag": ["important", "standard"],
+            ...         "attributes.metadata.priority": ["high", "medium"],
+            ...     }
+            ... )
+            Method 3: Combining both approaches
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1"],
+            ...         "attributes.metadata.tag": ["important"],
+            ...         "patch_document": [
+            ...             {"priority": "high"}
+            ...         ],  # Overrides conflicting fields
+            ...     }
+            ... )
+            Method 4: Setting fields to null
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1"],
+            ...         "attributes.metadata.old_field": [
+            ...             None
+            ...         ],  # Sets field to JSON null
+            ...         "patch_document": [
+            ...             {"other_field": None}
+            ...         ],  # Also sets field to JSON null
+            ...     }
+            ... )
         """
         # Import validation modules
         from arize.spans.columns import SPAN_SPAN_ID_COL
@@ -992,7 +1002,6 @@ class SpansClient:
         end_time: datetime,
         where: str = "",
         columns: list | None = None,
-        similarity_search_params: SimilaritySearchParams | None = None,
         stream_chunk_size: int | None = None,
     ) -> pd.DataFrame:
         """Export span data from Arize to a pandas DataFrame.
@@ -1002,8 +1011,7 @@ class SpansClient:
         WHERE clauses and similarity search for semantic retrieval.
         Returns:
-        -------
-            pd.DataFrame: DataFrame containing the requested span data with columns
+            DataFrame containing the requested span data with columns
                 for span metadata, attributes, events, and any custom fields.
         """
         with ArizeFlightClient(
@@ -1025,7 +1033,6 @@ class SpansClient:
                 end_time=end_time,
                 where=where,
                 columns=columns,
-                similarity_search_params=similarity_search_params,
                 stream_chunk_size=stream_chunk_size,
             )
@@ -1039,7 +1046,6 @@ class SpansClient:
         end_time: datetime,
         where: str = "",
         columns: list | None = None,
-        similarity_search_params: SimilaritySearchParams | None = None,
         stream_chunk_size: int | None = None,
     ) -> None:
         """Export span data from Arize to a Parquet file.
@@ -1069,7 +1075,6 @@ class SpansClient:
                 end_time=end_time,
                 where=where,
                 columns=columns,
-                similarity_search_params=similarity_search_params,
                 stream_chunk_size=stream_chunk_size,
             )

arize/spans/conversion.py CHANGED Viewed

@@ -35,10 +35,17 @@ def convert_timestamps(df: pd.DataFrame, fmt: str = "") -> pd.DataFrame:
 def _datetime_to_ns(dt: object, fmt: str) -> int:
     if isinstance(dt, str):
-        return int(
-            datetime.strptime(dt, fmt).replace(tzinfo=timezone.utc).timestamp()
-            * 1e9
-        )
+        # Try ISO 8601 with timezone first
+        try:
+            parsed = datetime.fromisoformat(dt)
+            if parsed.tzinfo is None:
+                # If no timezone, assume UTC
+                parsed = parsed.replace(tzinfo=timezone.utc)
+        except ValueError:
+            # Fall back to custom format
+            parsed = datetime.strptime(dt, fmt).replace(tzinfo=timezone.utc)
+        return int(parsed.timestamp() * 1e9)
     if isinstance(dt, datetime):
         return int(datetime.timestamp(dt) * 1e9)
     if isinstance(dt, pd.Timestamp):

arize/spans/validation/common/value_validation.py CHANGED Viewed

@@ -25,7 +25,7 @@ from arize.spans.validation.common.errors import (
     InvalidStringValueNotAllowedInColumn,
     InvalidTimestampValueInColumn,
 )
-from arize.types import is_json_str
+from arize.utils.types import is_json_str
 logger = logging.getLogger(__name__)

arize/spans/validation/spans/dataframe_form_validation.py CHANGED Viewed

@@ -12,7 +12,7 @@ from arize.spans.conversion import is_missing_value
 from arize.spans.validation.common.errors import (
     InvalidDataFrameColumnContentTypes,
 )
-from arize.types import is_array_of, is_dict_of, is_list_of
+from arize.utils.types import is_array_of, is_dict_of, is_list_of
 logger = logging.getLogger(__name__)

arize/spans/validation/spans/value_validation.py CHANGED Viewed

@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
 from arize.constants import spans as tracing_constants
 from arize.constants.ml import MAX_EMBEDDING_DIMENSIONALITY
+from arize.ml.types import StatusCodes
 from arize.spans import columns as tracing_cols
 from arize.spans.validation.common import value_validation
 from arize.spans.validation.common.errors import (
@@ -15,7 +16,7 @@ from arize.spans.validation.common.errors import (
     InvalidEventValueInColumn,
     InvalidLLMMessageValueInColumn,
 )
-from arize.types import StatusCodes, is_dict_of, is_json_str
+from arize.utils.types import is_dict_of, is_json_str
 if TYPE_CHECKING:
     import pandas as pd

arize/utils/dataframe.py CHANGED Viewed

@@ -4,7 +4,7 @@ import re
 import pandas as pd
-from arize.types import BaseSchema
+from arize.ml.types import BaseSchema
 # Resets the dataframe index if it is not a RangeIndex

arize/utils/online_tasks/dataframe_preprocessor.py CHANGED Viewed

@@ -122,7 +122,7 @@ def extract_nested_data_to_column(
 def _introspect_arize_attribute(value: object, attribute: str) -> object:
     """Recursively drill into `value` following the dot-delimited `attribute`.
-    Example:
+    Examples:
         value: [{'message.role': 'assistant', 'message.content': 'The capital of China is Beijing.'}]
         attribute: "0.message.content"
         Returns: 'The capital of China is Beijing.'
@@ -132,7 +132,6 @@ def _introspect_arize_attribute(value: object, attribute: str) -> object:
       - Parses JSON strings
       - Converts NumPy arrays to lists
       - Allows dotted keys (e.g. "message.content") by combining parts
     """
     if not attribute:
         return value
@@ -195,10 +194,10 @@ def _parse_value(
     idx = _try_int(key)
     if idx is not None:
         # Must be a tuple or list (_ensure_deserialized() already casts numpy arrays to python lists)
-        if isinstance(current_value, (list, tuple)):
-            if 0 <= idx < len(current_value):
-                return (current_value[idx], num_parts_processed)
-            return (None, num_parts_processed)
+        if isinstance(current_value, list | tuple) and 0 <= idx < len(
+            current_value
+        ):
+            return (current_value[idx], num_parts_processed)
         return (None, num_parts_processed)
     # 2) Try dict approach

arize/utils/types.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""Common type definitions and data models used across the Arize SDK."""
+import json
+from collections.abc import Iterable, Sequence
+from typing import (
+    TypeVar,
+)
+import numpy as np
+def is_json_str(s: str) -> bool:
+    """Check if a string is valid JSON.
+    Args:
+        s: The string to validate.
+    Returns:
+        True if the string is valid JSON, False otherwise.
+    """
+    try:
+        json.loads(s)
+    except ValueError:
+        return False
+    except TypeError:
+        return False
+    return True
+T = TypeVar("T", bound=type)
+def is_array_of(arr: Sequence[object], tp: T) -> bool:
+    """Check if a value is a numpy array with all elements of a specific type.
+    Args:
+        arr: The sequence to check.
+        tp: The expected type for all elements.
+    Returns:
+        True if arr is a numpy array and all elements are of type tp.
+    """
+    return isinstance(arr, np.ndarray) and all(isinstance(x, tp) for x in arr)
+def is_list_of(lst: Sequence[object], tp: T) -> bool:
+    """Check if a value is a list with all elements of a specific type.
+    Args:
+        lst: The sequence to check.
+        tp: The expected type for all elements.
+    Returns:
+        True if lst is a list and all elements are of type tp.
+    """
+    return isinstance(lst, list) and all(isinstance(x, tp) for x in lst)
+def is_iterable_of(lst: Sequence[object], tp: T) -> bool:
+    """Check if a value is an iterable with all elements of a specific type.
+    Args:
+        lst: The sequence to check.
+        tp: The expected type for all elements.
+    Returns:
+        True if lst is an iterable and all elements are of type tp.
+    """
+    return isinstance(lst, Iterable) and all(isinstance(x, tp) for x in lst)
+def is_dict_of(
+    d: dict[object, object],
+    key_allowed_types: T,
+    value_allowed_types: T = (),
+    value_list_allowed_types: T = (),
+) -> bool:
+    """Method to check types are valid for dictionary.
+    Args:
+        d: Dictionary itself.
+        key_allowed_types: All allowed types for keys of dictionary.
+        value_allowed_types: All allowed types for values of dictionary.
+        value_list_allowed_types: If value is a list, these are the allowed
+            types for value list.
+    Returns:
+        True if the data types of dictionary match the types specified by the
+            arguments, false otherwise.
+    """
+    if value_list_allowed_types and not isinstance(
+        value_list_allowed_types, tuple
+    ):
+        value_list_allowed_types = (value_list_allowed_types,)
+    return (
+        isinstance(d, dict)
+        and all(isinstance(k, key_allowed_types) for k in d)
+        and all(
+            isinstance(v, value_allowed_types)
+            or any(is_list_of(v, t) for t in value_list_allowed_types)
+            for v in d.values()
+            if value_allowed_types or value_list_allowed_types
+        )
+    )

arize/version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information for the Arize SDK."""
-__version__ = "8.0.0a23"
+__version__ = "8.0.0b1"

arize 8.0.0a23__py3-none-any.whl → 8.0.0b1__py3-none-any.whl

arize 8.0.0a23py3-none-any.whl → 8.0.0b1py3-none-any.whl