PyPI - arize - Versions diffs - 8.0.0b1__py3-none-any.whl → 8.0.0b2__py3-none-any.whl - Mend

arize 8.0.0b1py3-none-any.whl → 8.0.0b2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

arize/__init__.py +1 -1
arize/_client_factory.py +50 -0
arize/_flight/client.py +4 -4
arize/_generated/api_client/api/datasets_api.py +6 -6
arize/_generated/api_client/api/experiments_api.py +6 -6
arize/_generated/api_client/api/projects_api.py +3 -3
arize/_lazy.py +25 -9
arize/client.py +6 -16
arize/config.py +9 -36
arize/constants/ml.py +9 -16
arize/constants/spans.py +5 -10
arize/datasets/client.py +13 -9
arize/datasets/errors.py +1 -1
arize/datasets/validation.py +2 -2
arize/embeddings/auto_generator.py +2 -2
arize/embeddings/errors.py +2 -2
arize/embeddings/tabular_generators.py +1 -1
arize/exceptions/base.py +0 -52
arize/exceptions/parameters.py +0 -329
arize/experiments/client.py +14 -7
arize/experiments/evaluators/base.py +6 -6
arize/experiments/evaluators/executors.py +10 -3
arize/experiments/evaluators/types.py +2 -2
arize/experiments/functions.py +18 -11
arize/experiments/types.py +3 -5
arize/logging.py +1 -1
arize/ml/batch_validation/errors.py +10 -1004
arize/ml/batch_validation/validator.py +273 -225
arize/ml/casting.py +7 -7
arize/ml/client.py +12 -11
arize/ml/proto.py +6 -6
arize/ml/stream_validation.py +2 -3
arize/ml/surrogate_explainer/mimic.py +3 -3
arize/ml/types.py +1 -55
arize/pre_releases.py +6 -3
arize/projects/client.py +9 -4
arize/regions.py +2 -2
arize/spans/client.py +13 -11
arize/spans/columns.py +32 -36
arize/spans/conversion.py +5 -6
arize/spans/validation/common/argument_validation.py +3 -3
arize/spans/validation/common/dataframe_form_validation.py +6 -6
arize/spans/validation/common/value_validation.py +1 -1
arize/spans/validation/evals/dataframe_form_validation.py +4 -4
arize/spans/validation/evals/evals_validation.py +6 -6
arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
arize/spans/validation/spans/dataframe_form_validation.py +2 -2
arize/spans/validation/spans/spans_validation.py +6 -6
arize/utils/arrow.py +2 -2
arize/utils/cache.py +2 -2
arize/utils/dataframe.py +4 -4
arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
arize/utils/openinference_conversion.py +10 -10
arize/utils/proto.py +1 -1
arize/version.py +1 -1
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/METADATA +23 -6
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/RECORD +60 -59
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
{arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0

arize/ml/casting.py CHANGED Viewed

@@ -1,6 +1,6 @@
+# type: ignore[pb2]
 """Type casting utilities for ML model data conversion."""
-# type: ignore[pb2]
 from __future__ import annotations
 import math
@@ -132,12 +132,12 @@ def cast_typed_columns(
     a column across many SDK uploads.
     Args:
-        dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
+        dataframe (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
         schema (Schema): The schema, which may include feature and tag column names
             in a TypedColumns object or a List[string].
     Returns:
-        tuple[pd.DataFrame, Schema]: A tuple containing:
+        tuple[:class:`pandas.DataFrame`, Schema]: A tuple containing:
             - dataframe: The dataframe, with columns cast to the specified types.
             - schema: A new Schema object, with feature and tag column names converted
                 to the List[string] format expected in downstream validation.
@@ -290,12 +290,12 @@ def _cast_columns(
     (feature_column_names or tag_column_names)
     Args:
-        dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
+        dataframe (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
         columns (TypedColumns): The TypedColumns object, which specifies the columns
             to cast (and/or to not cast) and their target types.
     Returns:
-        pd.DataFrame: The dataframe with columns cast to the specified types.
+        :class:`pandas.DataFrame`: The dataframe with columns cast to the specified types.
     Raises:
         ColumnCastingError: If casting fails.
@@ -350,12 +350,12 @@ def _cast_df(
     """Cast columns in a dataframe to the specified type.
     Args:
-        df (pd.DataFrame): A deepcopy of the user's dataframe.
+        df (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
         cols (list[str]): The list of column names to cast.
         target_type_str (str): The target type to cast to.
     Returns:
-        pd.DataFrame: The dataframe with columns cast to the specified types.
+        :class:`pandas.DataFrame`: The dataframe with columns cast to the specified types.
     Raises:
         Exception: If casting fails. Common exceptions raised by astype() are

arize/ml/client.py CHANGED Viewed

@@ -1,6 +1,6 @@
+# type: ignore[pb2]
 """Client implementation for managing ML models in the Arize platform."""
-# type: ignore[pb2]
 from __future__ import annotations
 import copy
@@ -542,7 +542,7 @@ class MLModelsClient:
         timeout: float | None = None,
         tmp_dir: str = "",
     ) -> requests.Response:
-        """Log a batch of model predictions and actuals to Arize from a pandas DataFrame.
+        """Log a batch of model predictions and actuals to Arize from a :class:`pandas.DataFrame`.
         This method uploads multiple records to Arize in a single batch operation using
         Apache Arrow format for efficient transfer. The dataframe structure is defined
@@ -554,8 +554,8 @@ class MLModelsClient:
             model_type: The type of model. Supported types: BINARY, MULTI_CLASS, REGRESSION,
                 RANKING, OBJECT_DETECTION. Note: GENERATIVE_LLM is not supported; use the
                 spans module instead.
-            dataframe: Pandas DataFrame containing the data to upload. Columns should
-                correspond to the schema field mappings.
+            dataframe (:class:`pandas.DataFrame`): Pandas DataFrame containing the data to
+                upload. Columns should correspond to the schema field mappings.
             schema: Schema object (Schema or CorpusSchema) that defines the mapping between
                 dataframe columns and Arize data fields (e.g., prediction_label_column_name,
                 feature_column_names, etc.).
@@ -811,10 +811,10 @@ class MLModelsClient:
         similarity_search_params: SimilaritySearchParams | None = None,
         stream_chunk_size: int | None = None,
     ) -> pd.DataFrame:
-        """Export model data from Arize to a pandas DataFrame.
+        """Export model data from Arize to a :class:`pandas.DataFrame`.
         Retrieves prediction and optional actual data for a model within a specified time
-        range and returns it as a pandas DataFrame for analysis.
+        range and returns it as a :class:`pandas.DataFrame` for analysis.
         Args:
             space_id: The space ID where the model resides.
@@ -835,8 +835,9 @@ class MLModelsClient:
             stream_chunk_size: Optional chunk size for streaming large result sets.
         Returns:
-            A pandas DataFrame containing the exported data with columns for predictions,
-            actuals (if requested), features, tags, timestamps, and other model metadata.
+            :class:`pandas.DataFrame`: A pandas DataFrame containing the exported data
+                with columns for predictions, actuals (if requested), features, tags,
+                timestamps, and other model metadata.
         Raises:
             RuntimeError: If the Flight client request fails or returns no response.
@@ -895,7 +896,7 @@ class MLModelsClient:
         """Export model data from Arize to a Parquet file and return as DataFrame.
         Retrieves prediction and optional actual data for a model within a specified time
-        range, saves it as a Parquet file, and returns it as a pandas DataFrame.
+        range, saves it as a Parquet file, and returns it as a :class:`pandas.DataFrame`.
         Args:
             space_id: The space ID where the model resides.
@@ -916,8 +917,8 @@ class MLModelsClient:
             stream_chunk_size: Optional chunk size for streaming large result sets.
         Returns:
-            A pandas DataFrame containing the exported data. The data is also saved to a
-            Parquet file by the underlying export client.
+            :class:`pandas.DataFrame`: A pandas DataFrame containing the exported data.
+                The data is also saved to a Parquet file by the underlying export client.
         Raises:
             RuntimeError: If the Flight client request fails or returns no response.

arize/ml/proto.py CHANGED Viewed

@@ -1,6 +1,6 @@
+# type: ignore[pb2]
 """Protocol buffer utilities for ML model data serialization."""
-# type: ignore[pb2]
 from __future__ import annotations
 from google.protobuf.timestamp_pb2 import Timestamp
@@ -30,10 +30,10 @@ def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
     """Convert a dictionary to protobuf format with string keys and pb2.Value values.
     Args:
-        d: Dictionary to convert, or None.
+        d: Dictionary to convert, or :obj:`None`.
     Returns:
-        Dictionary with string keys and protobuf Value objects, or empty dict if input is None.
+        Dictionary with string keys and protobuf Value objects, or empty dict if input is :obj:`None`.
     """
     if d is None:
         return {}
@@ -56,7 +56,7 @@ def get_pb_value(name: str | int | float, value: pb2.Value) -> pb2.Value:
         value: The value to convert to protobuf format.
     Returns:
-        A pb2.Value protobuf object, or None if value cannot be converted.
+        A pb2.Value protobuf object, or :obj:`None` if value cannot be converted.
     Raises:
         TypeError: If value type is not supported.
@@ -139,10 +139,10 @@ def get_pb_timestamp(time_overwrite: int | None) -> object | None:
     """Convert a Unix timestamp to a protobuf Timestamp object.
     Args:
-        time_overwrite: Unix epoch time in seconds, or None.
+        time_overwrite: Unix epoch time in seconds, or :obj:`None`.
     Returns:
-        A protobuf Timestamp object, or None if input is None.
+        A protobuf Timestamp object, or :obj:`None` if input is :obj:`None`.
     Raises:
         TypeError: If time_overwrite is not an integer.

arize/ml/stream_validation.py CHANGED Viewed

@@ -1,6 +1,5 @@
-"""Stream validation logic for ML model predictions."""
 # type: ignore[pb2]
+"""Stream validation logic for ML model predictions."""
 from arize.constants.ml import MAX_PREDICTION_ID_LEN, MIN_PREDICTION_ID_LEN
 from arize.exceptions.parameters import (
@@ -185,7 +184,7 @@ def validate_and_convert_prediction_id(
     """Validate and convert a prediction ID to string format, or generate one if absent.
     Args:
-        prediction_id: The prediction ID to validate/convert, or None.
+        prediction_id: The prediction ID to validate/convert, or :obj:`None`.
         environment: The environment context (training, validation, production).
         prediction_label: Optional prediction label for delayed record detection.
         actual_label: Optional actual label for delayed record detection.

arize/ml/surrogate_explainer/mimic.py CHANGED Viewed

@@ -36,7 +36,7 @@ class Mimic:
         """Initialize the Mimic explainer with training data and model.
         Args:
-            X: Training data DataFrame for the surrogate model.
+            X: Training data :class:`pandas.DataFrame` for the surrogate model.
             model_func: Model function to explain.
         """
         self.explainer = MimicExplainer(
@@ -48,7 +48,7 @@ class Mimic:
         )
     def explain(self, X: pd.DataFrame) -> pd.DataFrame:
-        """Explain feature importance for the given input DataFrame."""
+        """Explain feature importance for the given input :class:`pandas.DataFrame`."""
         return pd.DataFrame(
             self.explainer.explain_local(X).local_importance_values,
             columns=X.columns,
@@ -59,7 +59,7 @@ class Mimic:
     def augment(
         df: pd.DataFrame, schema: Schema, model_type: ModelTypes
     ) -> tuple[pd.DataFrame, Schema]:
-        """Augment the DataFrame and schema with SHAP values for explainability."""
+        """Augment the :class:`pandas.DataFrame` and schema with SHAP values for explainability."""
         features = schema.feature_column_names
         X = df[features]

arize/ml/types.py CHANGED Viewed

@@ -16,27 +16,13 @@ from typing import (
 import numpy as np
 from arize.constants.ml import (
-    # MAX_MULTI_CLASS_NAME_LENGTH,
-    # MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
     MAX_MULTI_CLASS_NAME_LENGTH,
     MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
     MAX_NUMBER_OF_SIMILARITY_REFERENCES,
     MAX_RAW_DATA_CHARACTERS,
     MAX_RAW_DATA_CHARACTERS_TRUNCATION,
-    # MAX_RAW_DATA_CHARACTERS,
-    # MAX_RAW_DATA_CHARACTERS_TRUNCATION,
 )
 from arize.exceptions.parameters import InvalidValueType
-#
-# from arize.utils.constants import (
-#     MAX_MULTI_CLASS_NAME_LENGTH,
-#     MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
-#     MAX_NUMBER_OF_SIMILARITY_REFERENCES,
-#     MAX_RAW_DATA_CHARACTERS,
-#     MAX_RAW_DATA_CHARACTERS_TRUNCATION,
-# )
-# from arize.utils.errors import InvalidValueType
 from arize.logging import get_truncation_warning_message
 from arize.utils.types import is_dict_of, is_iterable_of, is_list_of
@@ -299,30 +285,6 @@ class Embedding(NamedTuple):
         return any(isinstance(data, t) for t in (list, np.ndarray))
-# @dataclass
-# class _PromptOrResponseText:
-#     data: str
-#
-#     def validate(self, name: str) -> None:
-#         # Validate that data is a string
-#         if not isinstance(self.data, str):
-#             raise TypeError(f"'{name}' must be a str")
-#
-#         character_count = len(self.data)
-#         if character_count > MAX_RAW_DATA_CHARACTERS:
-#             raise ValueError(
-#                 f"'{name}' field must not contain more than {MAX_RAW_DATA_CHARACTERS} characters. "
-#                 f"Found {character_count}."
-#             )
-#         elif character_count > MAX_RAW_DATA_CHARACTERS_TRUNCATION:
-#             logger.warning(
-#                 get_truncation_warning_message(
-#                     f"'{name}'", MAX_RAW_DATA_CHARACTERS_TRUNCATION
-#                 )
-#             )
-#         return None
 class LLMRunMetadata(NamedTuple):
     """Metadata for LLM execution including token counts and latency."""
@@ -1021,22 +983,6 @@ class LLMRunMetadataColumnNames:
         )
-# @dataclass
-# class DocumentColumnNames:
-#     id_column_name: Optional[str] = None
-#     version_column_name: Optional[str] = None
-#     text_embedding_column_names: Optional[EmbeddingColumnNames] = None
-#
-#     def __iter__(self):
-#         return iter(
-#             (
-#                 self.id_column_name,
-#                 self.version_column_name,
-#                 self.text_embedding_column_names,
-#             )
-#         )
-#
-#
 @dataclass
 class SimilarityReference:
     """Reference to a prediction for similarity search operations."""
@@ -1531,7 +1477,7 @@ def add_to_column_count_dictionary(
     Args:
         column_dictionary: Dictionary mapping column names to counts.
-        col: The column name to increment, or None to skip.
+        col: The column name to increment, or :obj:`None` to skip.
     """
     if col:
         if col in column_dictionary:

arize/pre_releases.py CHANGED Viewed

@@ -4,6 +4,7 @@ import functools
 import logging
 from collections.abc import Callable
 from enum import StrEnum
+from typing import TypeVar
 from arize.version import __version__
@@ -19,6 +20,8 @@ class ReleaseStage(StrEnum):
 _WARNED: set[str] = set()
+_F = TypeVar("_F", bound=Callable)
 def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
     article = "an" if stage is ReleaseStage.ALPHA else "a"
@@ -28,10 +31,10 @@ def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
     )
-def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> object:
+def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> Callable[[_F], _F]:
     """Decorate a method to emit a prerelease warning via logging once per process."""
-    def deco(fn: Callable[..., object]) -> object:
+    def deco(fn: _F) -> _F:
         @functools.wraps(fn)
         def wrapper(*args: object, **kwargs: object) -> object:
             if key not in _WARNED:
@@ -39,6 +42,6 @@ def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> object:
                 logger.warning(_format_prerelease_message(key=key, stage=stage))
             return fn(*args, **kwargs)
-        return wrapper
+        return wrapper  # type: ignore[return-value]
     return deco

arize/projects/client.py CHANGED Viewed

@@ -9,6 +9,7 @@ from arize.pre_releases import ReleaseStage, prerelease_endpoint
 if TYPE_CHECKING:
     from arize._generated.api_client import models
+    from arize._generated.api_client.api_client import ApiClient
     from arize.config import SDKConfiguration
 logger = logging.getLogger(__name__)
@@ -26,18 +27,21 @@ class ProjectsClient:
     :class:`arize.config.SDKConfiguration`.
     """
-    def __init__(self, *, sdk_config: SDKConfiguration) -> None:
+    def __init__(
+        self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
+    ) -> None:
         """
         Args:
             sdk_config: Resolved SDK configuration.
+            generated_client: Shared generated API client instance.
         """  # noqa: D205, D212
         self._sdk_config = sdk_config
         # Import at runtime so it's still lazy and extras-gated by the parent
         from arize._generated import api_client as gen
-        # Use the shared generated client from the config
-        self._api = gen.ProjectsApi(self._sdk_config.get_generated_client())
+        # Use the provided client directly
+        self._api = gen.ProjectsApi(generated_client)
     @prerelease_endpoint(key="projects.list", stage=ReleaseStage.BETA)
     def list(
@@ -125,7 +129,8 @@ class ProjectsClient:
         Args:
             project_id: Project ID.
-        Returns: This method returns None on success (common empty 204 response)
+        Returns:
+            This method returns None on success (common empty 204 response).
         Raises:
             arize._generated.api_client.exceptions.ApiException: If the API request fails

arize/regions.py CHANGED Viewed

@@ -13,7 +13,7 @@ class Region(StrEnum):
     EU_WEST_1 = "eu-west-1a"
     CA_CENTRAL_1 = "ca-central-1a"
     US_EAST_1 = "us-east-1b"
-    UNSPECIFIED = ""
+    UNSET = ""
 @dataclass(frozen=True)
@@ -36,5 +36,5 @@ def _get_region_endpoints(region: Region) -> RegionEndpoints:
 REGION_ENDPOINTS: dict[Region, RegionEndpoints] = {
-    r: _get_region_endpoints(r) for r in Region if r != Region.UNSPECIFIED
+    r: _get_region_endpoints(r) for r in Region if r != Region.UNSET
 }

arize/spans/client.py CHANGED Viewed

@@ -1,6 +1,6 @@
+# type: ignore[pb2]
 """Client implementation for managing spans and traces in the Arize platform."""
-# type: ignore[pb2]
 from __future__ import annotations
 import json
@@ -78,10 +78,11 @@ class SpansClient:
         Args:
             space_id: The space ID where the project resides.
             project_name: A unique name to identify your project in the Arize platform.
-            dataframe: The dataframe containing the LLM traces.
-            evals_dataframe: A dataframe containing LLM evaluations data.
-                The evaluations are joined to their corresponding spans via a left outer join, i.e.,
-                using only `context.span_id` from the spans dataframe. Defaults to None.
+            dataframe (:class:`pandas.DataFrame`): The dataframe containing the LLM traces.
+            evals_dataframe (:class:`pandas.DataFrame` | :obj:`None`): A dataframe containing
+                LLM evaluations data. The evaluations are joined to their corresponding spans
+                via a left outer join, i.e., using only `context.span_id` from the spans
+                dataframe. Defaults to None.
             datetime_format: format for the timestamp captured in the LLM traces.
                 Defaults to "%Y-%m-%dT%H:%M:%S.%f+00:00".
             validate: When set to True, validation is run before sending data.
@@ -280,7 +281,7 @@ class SpansClient:
         Args:
             space_id: The space ID where the project resides.
             project_name: A unique name to identify your project in the Arize platform.
-            dataframe: A dataframe containing LLM evaluations data.
+            dataframe (:class:`pandas.DataFrame`): A dataframe containing LLM evaluations data.
             validate: When set to True, validation is run before sending data.
                 Defaults to True.
             force_http: Force the use of HTTP for data upload. Defaults to False.
@@ -453,7 +454,7 @@ class SpansClient:
         Args:
             space_id: The space ID where the project resides.
             project_name: A unique name to identify your project in the Arize platform.
-            dataframe: A dataframe containing LLM annotation data.
+            dataframe (:class:`pandas.DataFrame`): A dataframe containing LLM annotation data.
             validate: When set to True, validation is run before sending data.
                 Defaults to True.
         """
@@ -684,7 +685,8 @@ class SpansClient:
         Args:
             space_id: The space ID where the project resides.
             project_name: A unique name to identify your project in the Arize platform.
-            dataframe: DataFrame with span_ids and either patch documents or metadata field columns.
+            dataframe (:class:`pandas.DataFrame`): DataFrame with span_ids and either patch
+                documents or metadata field columns.
             patch_document_column_name: Name of the column containing JSON patch documents.
                 Defaults to "patch_document".
             validate: When set to True, validation is run before sending data.
@@ -1004,14 +1006,14 @@ class SpansClient:
         columns: list | None = None,
         stream_chunk_size: int | None = None,
     ) -> pd.DataFrame:
-        """Export span data from Arize to a pandas DataFrame.
+        """Export span data from Arize to a :class:`pandas.DataFrame`.
         Retrieves trace/span data from the specified project within a time range
-        and returns it as a pandas DataFrame. Supports filtering with SQL-like
+        and returns it as a :class:`pandas.DataFrame`. Supports filtering with SQL-like
         WHERE clauses and similarity search for semantic retrieval.
         Returns:
-            DataFrame containing the requested span data with columns
+            :class:`pandas.DataFrame`: DataFrame containing the requested span data with columns
                 for span metadata, attributes, events, and any custom fields.
         """
         with ArizeFlightClient(

arize/spans/columns.py CHANGED Viewed

@@ -39,8 +39,6 @@ class SpanColumn:
         self.data_type = data_type
-#
-#
 # Root level columns
 SPAN_TRACE_ID_COL = SpanColumn(
     name="context.trace_id",
@@ -96,18 +94,18 @@ SPAN_KIND_COL = SpanColumn(
     data_type=SpanColumnDataType.STRING,
 )
 # Attributes Exception columns
-# SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL = SpanColumn(
-#     name=f"attributes.{otel.SpanAttributes.EXCEPTION_TYPE}",
-#     data_type=SpanColumnDataType.STRING,
-# )
+SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL = SpanColumn(
+    name=f"attributes.{otel.SpanAttributes.EXCEPTION_TYPE}",
+    data_type=SpanColumnDataType.STRING,
+)
 SPAN_ATTRIBUTES_EXCEPTION_MESSAGE_COL = SpanColumn(
     name=f"attributes.{otel.SpanAttributes.EXCEPTION_MESSAGE}",
     data_type=SpanColumnDataType.STRING,
 )
-# SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL = SpanColumn(
-#     name=f"attributes.{otel.SpanAttributes.EXCEPTION_ESCAPED}",
-#     data_type=SpanColumnDataType.BOOL,
-# )
+SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL = SpanColumn(
+    name=f"attributes.{otel.SpanAttributes.EXCEPTION_ESCAPED}",
+    data_type=SpanColumnDataType.BOOL,
+)
 SPAN_ATTRIBUTES_EXCEPTION_STACKTRACE_COL = SpanColumn(
     name=f"attributes.{otel.SpanAttributes.EXCEPTION_STACKTRACE}",
     data_type=SpanColumnDataType.STRING,
@@ -176,20 +174,19 @@ SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VERSION_COL = SpanColumn(
     name=f"attributes.{oinf.SpanAttributes.LLM_PROMPT_TEMPLATE_VERSION}",
     data_type=SpanColumnDataType.STRING,
 )
-# SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL = SpanColumn(
-#     name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_PROMPT}",
-#     data_type=SpanColumnDataType.NUMERIC,
-# )
-# SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL = SpanColumn(
-#     name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_COMPLETION}",
-#     data_type=SpanColumnDataType.NUMERIC,
-# )
-# SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL = SpanColumn(
-#     name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_TOTAL}",
-#     data_type=SpanColumnDataType.NUMERIC,
-# )
+SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL = SpanColumn(
+    name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_PROMPT}",
+    data_type=SpanColumnDataType.NUMERIC,
+)
+SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL = SpanColumn(
+    name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_COMPLETION}",
+    data_type=SpanColumnDataType.NUMERIC,
+)
+SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL = SpanColumn(
+    name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_TOTAL}",
+    data_type=SpanColumnDataType.NUMERIC,
+)
 # Attributes Message Keys
-# SPAN_ATTRIBUTES_MESSAGE_NAME_KEY = f"{oinf.MessageAttributes.MESSAGE_NAME}"
 SPAN_ATTRIBUTES_MESSAGE_ROLE_KEY = f"{oinf.MessageAttributes.MESSAGE_ROLE}"
 SPAN_ATTRIBUTES_MESSAGE_CONTENT_KEY = (
     f"{oinf.MessageAttributes.MESSAGE_CONTENT}"
@@ -223,7 +220,6 @@ SPAN_ATTRIBUTES_RETRIEVAL_DOCUMENTS_COL = SpanColumn(
 )
 # Document Object Keys
 SPAN_ATTRIBUTES_DOCUMENT_ID_KEY = f"{oinf.DocumentAttributes.DOCUMENT_ID}"
-# SPAN_ATTRIBUTES_DOCUMENT_SCORE_KEY = f"{oinf.DocumentAttributes.DOCUMENT_SCORE}"
 SPAN_ATTRIBUTES_DOCUMENT_CONTENT_KEY = (
     f"{oinf.DocumentAttributes.DOCUMENT_CONTENT}"
 )
@@ -247,10 +243,10 @@ SPAN_ATTRIBUTES_RERANKER_MODEL_NAME_COL = SpanColumn(
     name=f"attributes.{oinf.RerankerAttributes.RERANKER_MODEL_NAME}",
     data_type=SpanColumnDataType.STRING,
 )
-# SPAN_ATTRIBUTES_RERANKER_TOP_K_COL = SpanColumn(
-#     name=f"attributes.{oinf.RerankerAttributes.RERANKER_TOP_K}",
-#     data_type=SpanColumnDataType.NUMERIC,
-# )
+SPAN_ATTRIBUTES_RERANKER_TOP_K_COL = SpanColumn(
+    name=f"attributes.{oinf.RerankerAttributes.RERANKER_TOP_K}",
+    data_type=SpanColumnDataType.NUMERIC,
+)
 SPAN_ATTRIBUTES_SESSION_ID = SpanColumn(
     name=f"attributes.{oinf.SpanAttributes.SESSION_ID}",
     data_type=SpanColumnDataType.STRING,
@@ -281,9 +277,9 @@ SPAN_OPENINFERENCE_COLUMNS = [
     SPAN_STATUS_CODE_COL,
     SPAN_STATUS_MESSAGE_COL,
     SPAN_EVENTS_COL,
-    #     SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL,
+    SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL,
     SPAN_ATTRIBUTES_EXCEPTION_MESSAGE_COL,
-    #     SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL,
+    SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL,
     SPAN_ATTRIBUTES_EXCEPTION_STACKTRACE_COL,
     SPAN_ATTRIBUTES_INPUT_VALUE_COL,
     SPAN_ATTRIBUTES_INPUT_MIME_TYPE_COL,
@@ -297,9 +293,9 @@ SPAN_OPENINFERENCE_COLUMNS = [
     SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_TEMPLATE_COL,
     SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VARIABLES_COL,
     SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VERSION_COL,
-    #     SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL,
-    #     SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL,
-    #     SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL,
+    SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL,
+    SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL,
+    SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL,
     SPAN_ATTRIBUTES_TOOL_NAME_COL,
     SPAN_ATTRIBUTES_TOOL_DESCRIPTION_COL,
     SPAN_ATTRIBUTES_TOOL_PARAMETERS_COL,
@@ -308,18 +304,18 @@ SPAN_OPENINFERENCE_COLUMNS = [
     SPAN_ATTRIBUTES_RERANKER_OUTPUT_DOCUMENTS_COL,
     SPAN_ATTRIBUTES_RERANKER_QUERY_COL,
     SPAN_ATTRIBUTES_RERANKER_MODEL_NAME_COL,
-    #     SPAN_ATTRIBUTES_RERANKER_TOP_K_COL,
+    SPAN_ATTRIBUTES_RERANKER_TOP_K_COL,
     SPAN_ATTRIBUTES_SESSION_ID,
     SPAN_ATTRIBUTES_USER_ID,
     SPAN_ATTRIBUTES_METADATA,
     SPAN_ATTRIBUTES_LLM_TOOLS_COL,
 ]
-#
 # List of columns that must be present in the dataframe
 SPAN_OPENINFERENCE_REQUIRED_COLUMNS = [
     col for col in SPAN_OPENINFERENCE_COLUMNS if col.required
 ]
-#
 # Eval columns
 # EVAL_COLUMN_PREFIX = "eval."
 # SESSION_EVAL_COLUMN_PREFIX = "session_eval."

arize/spans/conversion.py CHANGED Viewed

@@ -7,22 +7,21 @@ from datetime import datetime, timezone
 import numpy as np
 import pandas as pd
-# from arize.utils.logging import logger
 from arize.spans.columns import SPAN_OPENINFERENCE_COLUMNS, SpanColumnDataType
 def convert_timestamps(df: pd.DataFrame, fmt: str = "") -> pd.DataFrame:
-    """Convert timestamp columns in a DataFrame to nanoseconds.
+    """Convert timestamp columns in a :class:`pandas.DataFrame` to nanoseconds.
     Args:
-        df: The pandas DataFrame containing timestamp columns.
+        df: The :class:`pandas.DataFrame` containing timestamp columns.
         fmt: Optional datetime format string for parsing string timestamps. Defaults to "".
     Returns:
-        The DataFrame with timestamp columns converted to nanoseconds.
+        The :class:`pandas.DataFrame` with timestamp columns converted to nanoseconds.
     Raises:
-        KeyError: If required timestamp column is not found in DataFrame.
+        KeyError: If required timestamp column is not found in :class:`pandas.DataFrame`.
     """
     for col in SPAN_OPENINFERENCE_COLUMNS:
         if col.data_type != SpanColumnDataType.TIMESTAMP:
@@ -70,7 +69,7 @@ def jsonify_dictionaries(df: pd.DataFrame) -> pd.DataFrame:
     """Convert dictionary and list-of-dictionary columns to JSON strings.
     Args:
-        df: The pandas DataFrame containing dictionary columns.
+        df: The :class:`pandas.DataFrame` containing dictionary columns.
     Returns:
         The DataFrame with dictionary columns converted to JSON strings.

arize/spans/validation/common/argument_validation.py CHANGED Viewed

@@ -39,13 +39,13 @@ def check_field_convertible_to_str(
 def check_dataframe_type(
     dataframe: object,
 ) -> list[InvalidTypeArgument]:
-    """Validates that the provided argument is a pandas DataFrame.
+    """Validates that the provided argument is a :class:`pandas.DataFrame`.
     Args:
-        dataframe: The object to validate as a pandas DataFrame.
+        dataframe: The object to validate as a :class:`pandas.DataFrame`.
     Returns:
-        List of validation errors if not a DataFrame (empty if valid).
+        List of validation errors if not a :class:`pandas.DataFrame` (empty if valid).
     """
     if not isinstance(dataframe, pd.DataFrame):
         return [

arize 8.0.0b1__py3-none-any.whl → 8.0.0b2__py3-none-any.whl

arize 8.0.0b1py3-none-any.whl → 8.0.0b2py3-none-any.whl