PyPI - arize - Versions diffs - 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl - Mend

arize 8.0.0a22py3-none-any.whl → 8.0.0b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

arize/__init__.py +28 -19
arize/_exporter/client.py +56 -37
arize/_exporter/parsers/tracing_data_parser.py +41 -30
arize/_exporter/validation.py +3 -3
arize/_flight/client.py +207 -76
arize/_generated/api_client/__init__.py +30 -6
arize/_generated/api_client/api/__init__.py +1 -0
arize/_generated/api_client/api/datasets_api.py +864 -190
arize/_generated/api_client/api/experiments_api.py +167 -131
arize/_generated/api_client/api/projects_api.py +1197 -0
arize/_generated/api_client/api_client.py +2 -2
arize/_generated/api_client/configuration.py +42 -34
arize/_generated/api_client/exceptions.py +2 -2
arize/_generated/api_client/models/__init__.py +15 -4
arize/_generated/api_client/models/dataset.py +10 -10
arize/_generated/api_client/models/dataset_example.py +111 -0
arize/_generated/api_client/models/dataset_example_update.py +100 -0
arize/_generated/api_client/models/dataset_version.py +13 -13
arize/_generated/api_client/models/datasets_create_request.py +16 -8
arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
arize/_generated/api_client/models/datasets_list200_response.py +10 -4
arize/_generated/api_client/models/experiment.py +14 -16
arize/_generated/api_client/models/experiment_run.py +108 -0
arize/_generated/api_client/models/experiment_run_create.py +102 -0
arize/_generated/api_client/models/experiments_create_request.py +16 -10
arize/_generated/api_client/models/experiments_list200_response.py +10 -4
arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
arize/_generated/api_client/models/primitive_value.py +172 -0
arize/_generated/api_client/models/problem.py +100 -0
arize/_generated/api_client/models/project.py +99 -0
arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
arize/_generated/api_client/models/projects_list200_response.py +106 -0
arize/_generated/api_client/rest.py +2 -2
arize/_generated/api_client/test/test_dataset.py +4 -2
arize/_generated/api_client/test/test_dataset_example.py +56 -0
arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
arize/_generated/api_client/test/test_dataset_version.py +7 -2
arize/_generated/api_client/test/test_datasets_api.py +27 -13
arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
arize/_generated/api_client/test/test_experiment.py +2 -4
arize/_generated/api_client/test/test_experiment_run.py +56 -0
arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
arize/_generated/api_client/test/test_experiments_api.py +6 -6
arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
arize/_generated/api_client/test/test_problem.py +57 -0
arize/_generated/api_client/test/test_project.py +58 -0
arize/_generated/api_client/test/test_projects_api.py +59 -0
arize/_generated/api_client/test/test_projects_create_request.py +54 -0
arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
arize/_generated/api_client_README.md +43 -29
arize/_generated/protocol/flight/flight_pb2.py +400 -0
arize/_lazy.py +27 -19
arize/client.py +181 -58
arize/config.py +324 -116
arize/constants/__init__.py +1 -0
arize/constants/config.py +11 -4
arize/constants/ml.py +6 -4
arize/constants/openinference.py +2 -0
arize/constants/pyarrow.py +2 -0
arize/constants/spans.py +3 -1
arize/datasets/__init__.py +1 -0
arize/datasets/client.py +304 -84
arize/datasets/errors.py +32 -2
arize/datasets/validation.py +18 -8
arize/embeddings/__init__.py +2 -0
arize/embeddings/auto_generator.py +23 -19
arize/embeddings/base_generators.py +89 -36
arize/embeddings/constants.py +2 -0
arize/embeddings/cv_generators.py +26 -4
arize/embeddings/errors.py +27 -5
arize/embeddings/nlp_generators.py +43 -18
arize/embeddings/tabular_generators.py +46 -31
arize/embeddings/usecases.py +12 -2
arize/exceptions/__init__.py +1 -0
arize/exceptions/auth.py +11 -1
arize/exceptions/base.py +29 -4
arize/exceptions/models.py +21 -2
arize/exceptions/parameters.py +31 -0
arize/exceptions/spaces.py +12 -1
arize/exceptions/types.py +86 -7
arize/exceptions/values.py +220 -20
arize/experiments/__init__.py +13 -0
arize/experiments/client.py +394 -285
arize/experiments/evaluators/__init__.py +1 -0
arize/experiments/evaluators/base.py +74 -41
arize/experiments/evaluators/exceptions.py +6 -3
arize/experiments/evaluators/executors.py +121 -73
arize/experiments/evaluators/rate_limiters.py +106 -57
arize/experiments/evaluators/types.py +34 -7
arize/experiments/evaluators/utils.py +65 -27
arize/experiments/functions.py +103 -101
arize/experiments/tracing.py +52 -44
arize/experiments/types.py +56 -31
arize/logging.py +54 -22
arize/ml/__init__.py +1 -0
arize/ml/batch_validation/__init__.py +1 -0
arize/{models → ml}/batch_validation/errors.py +545 -67
arize/{models → ml}/batch_validation/validator.py +344 -303
arize/ml/bounded_executor.py +47 -0
arize/{models → ml}/casting.py +118 -108
arize/{models → ml}/client.py +339 -118
arize/{models → ml}/proto.py +97 -42
arize/{models → ml}/stream_validation.py +43 -15
arize/ml/surrogate_explainer/__init__.py +1 -0
arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
arize/{types.py → ml/types.py} +355 -354
arize/pre_releases.py +44 -0
arize/projects/__init__.py +1 -0
arize/projects/client.py +134 -0
arize/regions.py +40 -0
arize/spans/__init__.py +1 -0
arize/spans/client.py +204 -175
arize/spans/columns.py +13 -0
arize/spans/conversion.py +60 -37
arize/spans/validation/__init__.py +1 -0
arize/spans/validation/annotations/__init__.py +1 -0
arize/spans/validation/annotations/annotations_validation.py +6 -4
arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
arize/spans/validation/annotations/value_validation.py +35 -11
arize/spans/validation/common/__init__.py +1 -0
arize/spans/validation/common/argument_validation.py +33 -8
arize/spans/validation/common/dataframe_form_validation.py +35 -9
arize/spans/validation/common/errors.py +211 -11
arize/spans/validation/common/value_validation.py +81 -14
arize/spans/validation/evals/__init__.py +1 -0
arize/spans/validation/evals/dataframe_form_validation.py +28 -8
arize/spans/validation/evals/evals_validation.py +34 -4
arize/spans/validation/evals/value_validation.py +26 -3
arize/spans/validation/metadata/__init__.py +1 -1
arize/spans/validation/metadata/argument_validation.py +14 -5
arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
arize/spans/validation/metadata/value_validation.py +24 -10
arize/spans/validation/spans/__init__.py +1 -0
arize/spans/validation/spans/dataframe_form_validation.py +35 -14
arize/spans/validation/spans/spans_validation.py +35 -4
arize/spans/validation/spans/value_validation.py +78 -8
arize/utils/__init__.py +1 -0
arize/utils/arrow.py +31 -15
arize/utils/cache.py +34 -6
arize/utils/dataframe.py +20 -3
arize/utils/online_tasks/__init__.py +2 -0
arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
arize/utils/openinference_conversion.py +44 -5
arize/utils/proto.py +10 -0
arize/utils/size.py +5 -3
arize/utils/types.py +105 -0
arize/version.py +3 -1
{arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
arize-8.0.0b0.dist-info/RECORD +175 -0
{arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
arize/_generated/protocol/flight/export_pb2.py +0 -61
arize/_generated/protocol/flight/ingest_pb2.py +0 -365
arize/models/__init__.py +0 -0
arize/models/batch_validation/__init__.py +0 -0
arize/models/bounded_executor.py +0 -34
arize/models/surrogate_explainer/__init__.py +0 -0
arize-8.0.0a22.dist-info/RECORD +0 -146
arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12

arize/spans/client.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Client implementation for managing spans and traces in the Arize platform."""
 # type: ignore[pb2]
 from __future__ import annotations
@@ -6,7 +8,7 @@ import logging
 import re
 from datetime import datetime, timezone
 from functools import partial
-from typing import TYPE_CHECKING, Any, Dict, List
+from typing import TYPE_CHECKING, Any
 import numpy as np
 import pandas as pd
@@ -16,10 +18,6 @@ from google.protobuf import json_format, message
 from arize._exporter.client import ArizeExportClient
 from arize._flight.client import ArizeFlightClient, FlightPostArrowFileResponse
 from arize._flight.types import FlightRequestType
-from arize._generated.protocol.flight.ingest_pb2 import (
-    WriteSpanAnnotationResponse,
-    WriteSpanEvaluationResponse,
-)
 from arize.constants.spans import DEFAULT_DATETIME_FMT
 from arize.exceptions.base import (
     INVALID_ARROW_CONVERSION_MSG,
@@ -28,10 +26,8 @@ from arize.exceptions.base import (
 from arize.exceptions.models import MissingProjectNameError
 from arize.exceptions.spaces import MissingSpaceIDError
 from arize.logging import CtxAdapter
-from arize.types import Environments, SimilaritySearchParams
-from arize.utils.arrow import (
-    post_arrow_table,
-)
+from arize.ml.types import Environments
+from arize.utils.arrow import post_arrow_table
 from arize.utils.dataframe import (
     remove_extraneous_columns,
     reset_dataframe_index,
@@ -41,13 +37,25 @@ from arize.utils.proto import get_pb_schema_tracing
 if TYPE_CHECKING:
     import requests
+    from arize._generated.protocol.flight import flight_pb2
     from arize.config import SDKConfiguration
 logger = logging.getLogger(__name__)
 class SpansClient:
-    def __init__(self, *, sdk_config: SDKConfiguration):
+    """Client for logging LLM tracing spans and evaluations to Arize.
+    This class is primarily intended for internal use within the SDK. Users are
+    highly encouraged to access resource-specific functionality via
+    :class:`arize.ArizeClient`.
+    """
+    def __init__(self, *, sdk_config: SDKConfiguration) -> None:
+        """
+        Args:
+            sdk_config: Resolved SDK configuration.
+        """  # noqa: D205, D212
         self._sdk_config = sdk_config
     def log(
@@ -62,25 +70,26 @@ class SpansClient:
         timeout: float | None = None,
         tmp_dir: str = "",
     ) -> requests.Response:
-        """
-        Logs a pandas dataframe containing LLM tracing data to Arize via a POST request. Returns a
-        :class:`Response` object from the Requests HTTP library to ensure successful delivery of
-        records.
+        """Logs a pandas dataframe containing LLM tracing data to Arize via a POST request.
+        Returns a :class:`Response` object from the Requests HTTP library to ensure
+        successful delivery of records.
         Args:
-            dataframe (pd.DataFrame): The dataframe containing the LLM traces.
-            evals_dataframe (pd.DataFrame, optional): A dataframe containing LLM evaluations data.
+            space_id: The space ID where the project resides.
+            project_name: A unique name to identify your project in the Arize platform.
+            dataframe: The dataframe containing the LLM traces.
+            evals_dataframe: A dataframe containing LLM evaluations data.
                 The evaluations are joined to their corresponding spans via a left outer join, i.e.,
                 using only `context.span_id` from the spans dataframe. Defaults to None.
-            datetime_format (str): format for the timestamp captured in the LLM traces.
+            datetime_format: format for the timestamp captured in the LLM traces.
                 Defaults to "%Y-%m-%dT%H:%M:%S.%f+00:00".
-            validate (bool, optional): When set to True, validation is run before sending data.
+            validate: When set to True, validation is run before sending data.
                 Defaults to True.
-            tmp_dir (str, optional): Temporary directory/file to store the serialized data in binary
-                before sending to Arize.
-            timeout (float, optional): You can stop waiting for a response after a given number
+            timeout: You can stop waiting for a response after a given number
                 of seconds with the timeout parameter. Defaults to None.
-            project_name (str, optional): A unique name to identify your project in the Arize platform.
+            tmp_dir: Temporary directory/file to store the serialized data in binary
+                before sending to Arize.
         Returns:
             `Response` object
@@ -220,12 +229,12 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         proto_schema = get_pb_schema_tracing(project_name=project_name)
@@ -262,27 +271,23 @@ class SpansClient:
         force_http: bool = False,
         timeout: float | None = None,
         tmp_dir: str = "",
-    ) -> WriteSpanEvaluationResponse:
-        """
-        Logs a pandas dataframe containing LLM evaluations data to Arize via a Flight gRPC request.
-        The dataframe must contain a column `context.span_id`
-        such that Arize can assign each evaluation to its respective span.
+    ) -> flight_pb2.WriteSpanEvaluationResponse:
+        """Logs a pandas dataframe containing LLM evaluations data to Arize via a Flight gRPC request.
+        The dataframe must contain a column `context.span_id` such that Arize can assign
+        each evaluation to its respective span.
         Args:
-            dataframe (pd.DataFrame): A dataframe containing LLM evaluations data.
-            model_id (str): A unique name to identify your model in the Arize platform.
-                (Deprecated: Use `project_name` instead.)
-            model_version (str, optional): Used to group a subset of traces a given
-                model_id to compare and track changes. It should match the model_id of the spans
-                sent previously, to which evaluations will be assigned. Defaults to None.
-            validate (bool, optional): When set to True, validation is run before sending data.
+            space_id: The space ID where the project resides.
+            project_name: A unique name to identify your project in the Arize platform.
+            dataframe: A dataframe containing LLM evaluations data.
+            validate: When set to True, validation is run before sending data.
                 Defaults to True.
-            path (str, optional): Temporary directory/file to store the serialized data in binary
-                before sending to Arize.
-            timeout (float, optional): You can stop waiting for a response after a given number
+            force_http: Force the use of HTTP for data upload. Defaults to False.
+            timeout: You can stop waiting for a response after a given number
                 of seconds with the timeout parameter. Defaults to None.
-            project_name (str, optional): A unique name to identify your project in the Arize platform.
-                Either model_id or project_name must be provided.
+            tmp_dir: Temporary directory/file to store the serialized data in binary
+                before sending to Arize.
         """
         from arize.spans.columns import EVAL_COLUMN_PATTERN, SPAN_SPAN_ID_COL
         from arize.spans.validation.evals import evals_validation
@@ -358,12 +363,12 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(evals_df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         if force_http:
@@ -395,8 +400,8 @@ class SpansClient:
         response = None
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -409,8 +414,8 @@ class SpansClient:
                     request_type=request_type,
                 )
             except Exception as e:
-                msg = f"Error during update request: {str(e)}"
-                log.error(msg)
+                msg = f"Error during update request: {e!s}"
+                log.exception(msg)
                 raise RuntimeError(msg) from e
         if response is None:
@@ -437,19 +442,19 @@ class SpansClient:
         project_name: str,
         dataframe: pd.DataFrame,
         validate: bool = True,
-    ) -> WriteSpanAnnotationResponse:
-        """
-        Logs a pandas dataframe containing LLM span annotations to Arize via a Flight gRPC request.
-        The dataframe must contain a column `context.span_id`
-        such that Arize can assign each annotation to its respective span.
-        Annotation columns should follow the pattern `annotation.<name>.<suffix>` where suffix is
-        either `label` or `score`. An optional `annotation.notes` column can be included for
-        free-form text notes.
+    ) -> flight_pb2.WriteSpanAnnotationResponse:
+        """Logs a pandas dataframe containing LLM span annotations to Arize via a Flight gRPC request.
+        The dataframe must contain a column `context.span_id` such that Arize can assign
+        each annotation to its respective span. Annotation columns should follow the pattern
+        `annotation.<name>.<suffix>` where suffix is either `label` or `score`. An optional
+        `annotation.notes` column can be included for free-form text notes.
         Args:
-            dataframe (pd.DataFrame): A dataframe containing LLM annotation data.
-            project_name (str): A unique name to identify your project in the Arize platform.
-            validate (bool, optional): When set to True, validation is run before sending data.
+            space_id: The space ID where the project resides.
+            project_name: A unique name to identify your project in the Arize platform.
+            dataframe: A dataframe containing LLM annotation data.
+            validate: When set to True, validation is run before sending data.
                 Defaults to True.
         """
         from arize.spans.columns import (
@@ -588,12 +593,12 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(anno_df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         if ANNOTATION_NOTES_COLUMN_NAME in anno_df.columns:
@@ -611,8 +616,8 @@ class SpansClient:
         response = None
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -625,8 +630,8 @@ class SpansClient:
                     request_type=request_type,
                 )
             except Exception as e:
-                msg = f"Error during update request: {str(e)}"
-                log.error(msg)
+                msg = f"Error during update request: {e!s}"
+                log.exception(msg)
                 raise RuntimeError(msg) from e
         if response is None:
@@ -654,11 +659,13 @@ class SpansClient:
         dataframe: pd.DataFrame,
         patch_document_column_name: str = "patch_document",
         validate: bool = True,
-    ) -> Dict[str, Any]:
-        """
-        Log metadata updates using JSON Merge Patch format. This method is only supported for LLM model types.
+    ) -> dict[str, Any]:
+        """Log metadata updates using JSON Merge Patch format.
+        This method is only supported for LLM model types.
         The dataframe must contain a column `context.span_id` to identify spans and either:
         1. A column with JSON patch documents (specified by patch_document_column_name), or
         2. One or more columns with prefix `attributes.metadata.` that will be automatically
            converted to a patch document (e.g., `attributes.metadata.tag` → `{"tag": value}`).
@@ -666,7 +673,8 @@ class SpansClient:
         If both methods are used, the explicit patch document is applied after the individual field updates.
         The patches will be applied to the `attributes.metadata` field of each span.
-        **Type Handling:**
+        Type Handling:
         - The client primarily supports string, integer, and float data types.
         - Boolean values are converted to string representations.
         - Nested JSON objects and arrays are serialized to JSON strings during transmission.
@@ -674,20 +682,23 @@ class SpansClient:
           Note: This differs from standard JSON Merge Patch where null values remove fields.
         Args:
-            dataframe: DataFrame with span_ids and either patch documents or metadata field columns.
+            space_id: The space ID where the project resides.
             project_name: A unique name to identify your project in the Arize platform.
+            dataframe: DataFrame with span_ids and either patch documents or metadata field columns.
             patch_document_column_name: Name of the column containing JSON patch documents.
                 Defaults to "patch_document".
             validate: When set to True, validation is run before sending data.
         Returns:
             Dictionary containing update results with the following keys:
                 - spans_processed: Total number of spans in the input dataframe
                 - spans_updated: Count of successfully updated span metadata records
                 - spans_failed: Count of spans that failed to update
                 - errors: List of dictionaries with 'span_id' and 'error_message' keys for each failed span
-            Error types from the server include:
+                Error types from the server include:
                 - parse_failure: Failed to parse JSON metadata
                 - patch_failure: Failed to apply JSON patch
                 - type_conflict: Type conflict in metadata
@@ -696,58 +707,60 @@ class SpansClient:
                 - druid_rejection: Backend rejected the update
         Raises:
-            AuthError: When API key or space ID is missing
-            ValidationFailure: When validation of the dataframe or values fails
-            ImportError: When required tracing dependencies are missing
-            ArrowInvalid: When the dataframe cannot be converted to Arrow format
-            RuntimeError: If the request fails or no response is received
-        Example:
-            ```python
-            # Method 1: Using a patch document
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1", "span2"],
-                    "patch_document": [
-                        {"tag": "important"},
-                        {"priority": "high"},
-                    ],
-                }
-            )
-            # Method 2: Using direct field columns
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1", "span2"],
-                    "attributes.metadata.tag": ["important", "standard"],
-                    "attributes.metadata.priority": ["high", "medium"],
-                }
-            )
-            # Method 3: Combining both approaches
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1"],
-                    "attributes.metadata.tag": ["important"],
-                    "patch_document": [
-                        {"priority": "high"}
-                    ],  # This will override any conflicting fields
-                }
-            )
-            # Method 4: Setting fields to null
-            df = pd.DataFrame(
-                {
-                    "context.span_id": ["span1"],
-                    "attributes.metadata.old_field": [
-                        None
-                    ],  # Sets field to JSON null
-                    "patch_document": [
-                        {"other_field": None}
-                    ],  # Also sets field to JSON null
-                }
-            )
-            ```
+            AuthError: When API key or space ID is missing.
+            ValidationFailure: When validation of the dataframe or values fails.
+            ImportError: When required tracing dependencies are missing.
+            ArrowInvalid: When the dataframe cannot be converted to Arrow format.
+            RuntimeError: If the request fails or no response is received.
+        Examples:
+            Method 1: Using a patch document
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1", "span2"],
+            ...         "patch_document": [
+            ...             {"tag": "important"},
+            ...             {"priority": "high"},
+            ...         ],
+            ...     }
+            ... )
+            Method 2: Using direct field columns
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1", "span2"],
+            ...         "attributes.metadata.tag": ["important", "standard"],
+            ...         "attributes.metadata.priority": ["high", "medium"],
+            ...     }
+            ... )
+            Method 3: Combining both approaches
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1"],
+            ...         "attributes.metadata.tag": ["important"],
+            ...         "patch_document": [
+            ...             {"priority": "high"}
+            ...         ],  # Overrides conflicting fields
+            ...     }
+            ... )
+            Method 4: Setting fields to null
+            >>> df = pd.DataFrame(
+            ...     {
+            ...         "context.span_id": ["span1"],
+            ...         "attributes.metadata.old_field": [
+            ...             None
+            ...         ],  # Sets field to JSON null
+            ...         "patch_document": [
+            ...             {"other_field": None}
+            ...         ],  # Also sets field to JSON null
+            ...     }
+            ... )
         """
         # Import validation modules
         from arize.spans.columns import SPAN_SPAN_ID_COL
@@ -813,11 +826,10 @@ class SpansClient:
             )
         # Create a new column for patch documents if we're going to use it
-        if has_metadata_fields or has_patch_document:
-            # Use 'patch_document' as the standardized column name for downstream processing
-            final_patch_column = "patch_document"
-            if final_patch_column not in metadata_df.columns:
-                metadata_df[final_patch_column] = None
+        # Use 'patch_document' as the standardized column name for downstream processing
+        final_patch_column = "patch_document"
+        if final_patch_column not in metadata_df.columns:
+            metadata_df[final_patch_column] = None
         # Process metadata field columns if they exist
         if has_metadata_fields:
@@ -865,7 +877,7 @@ class SpansClient:
                 if patch:
                     processed_patches.append(patch)
                 if errors:
-                    validation_errors.append(errors)
+                    validation_errors.extend(errors)
             # If validation is enabled and errors found, raise ValidationFailure
             if validate and validation_errors:
@@ -922,9 +934,11 @@ class SpansClient:
             metadata_df[final_patch_column] = metadata_df[
                 final_patch_column
             ].apply(
-                lambda p: json.dumps(p)
-                if not isinstance(p, float) or not np.isnan(p)
-                else json.dumps({})
+                lambda p: (
+                    json.dumps(p)
+                    if not isinstance(p, float) or not np.isnan(p)
+                    else json.dumps({})
+                )
             )
         # Convert to Arrow table
@@ -932,20 +946,20 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(metadata_df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         request_type = FlightRequestType.METADATA
         response = None
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -958,8 +972,8 @@ class SpansClient:
                     request_type=request_type,
                 )
             except Exception as e:
-                msg = f"Error during update request: {str(e)}"
-                log.error(msg)
+                msg = f"Error during update request: {e!s}"
+                log.exception(msg)
                 raise RuntimeError(msg) from e
         if response is None:
@@ -987,14 +1001,23 @@ class SpansClient:
         start_time: datetime,
         end_time: datetime,
         where: str = "",
-        columns: List | None = None,
-        similarity_search_params: SimilaritySearchParams | None = None,
+        columns: list | None = None,
         stream_chunk_size: int | None = None,
     ) -> pd.DataFrame:
+        """Export span data from Arize to a pandas DataFrame.
+        Retrieves trace/span data from the specified project within a time range
+        and returns it as a pandas DataFrame. Supports filtering with SQL-like
+        WHERE clauses and similarity search for semantic retrieval.
+        Returns:
+            pd.DataFrame: DataFrame containing the requested span data with columns
+                for span metadata, attributes, events, and any custom fields.
+        """
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -1010,26 +1033,32 @@ class SpansClient:
                 end_time=end_time,
                 where=where,
                 columns=columns,
-                similarity_search_params=similarity_search_params,
                 stream_chunk_size=stream_chunk_size,
             )
     def export_to_parquet(
         self,
         *,
+        path: str,
         space_id: str,
         project_name: str,
         start_time: datetime,
         end_time: datetime,
         where: str = "",
-        columns: List | None = None,
-        similarity_search_params: SimilaritySearchParams | None = None,
+        columns: list | None = None,
         stream_chunk_size: int | None = None,
-    ) -> pd.DataFrame:
+    ) -> None:
+        """Export span data from Arize to a Parquet file.
+        Retrieves trace/span data from the specified project within a time range
+        and writes it directly to a Parquet file at the specified path. Supports
+        filtering with SQL-like WHERE clauses and similarity search for semantic
+        retrieval. Efficient for large datasets and long-term storage.
+        """
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -1038,6 +1067,7 @@ class SpansClient:
                 flight_client=flight_client,
             )
             return exporter.export_to_parquet(
+                path=path,
                 space_id=space_id,
                 model_id=project_name,
                 environment=Environments.TRACING,
@@ -1045,12 +1075,11 @@ class SpansClient:
                 end_time=end_time,
                 where=where,
                 columns=columns,
-                similarity_search_params=similarity_search_params,
                 stream_chunk_size=stream_chunk_size,
             )
-def _build_patch_document(row):
+def _build_patch_document(row: pd.Series) -> dict[str, object]:
     # Extract and preserve metadata values with proper types
     patch = {}
     for key in row.index:
@@ -1070,8 +1099,11 @@ def _build_patch_document(row):
 def _process_patch_document(
-    metadata_df, patch_document_column_name, field_patches, row_idx
-):
+    metadata_df: pd.DataFrame,
+    patch_document_column_name: str,
+    field_patches: pd.DataFrame,
+    row_idx: int,
+) -> dict[str, object]:
     # Get the field patch for this row
     field_patch = field_patches.iloc[row_idx]
@@ -1111,15 +1143,14 @@ def _process_patch_document(
         explicit_patch = {}
     # Merge patches - explicit patch takes precedence
-    merged_patch = {**field_patch, **explicit_patch}
-    return merged_patch
+    return {**field_patch, **explicit_patch}
 def _ensure_dict_patch(
     metadata_df: pd.DataFrame,
     final_patch_column: str,
     row_idx: int,
-):
+) -> tuple[dict[str, object], list[str]]:
     patch = metadata_df.loc[row_idx, final_patch_column]
     validation_errors = []
@@ -1141,19 +1172,19 @@ def _ensure_dict_patch(
             parsed = json.loads(patch)
             if isinstance(parsed, dict):
                 return parsed
-            else:
-                error_msg = (
-                    f"Row {row_idx}: JSON must be an object/dictionary, "
-                    f"got {type(parsed).__name__}"
-                )
-                logger.warning(error_msg)
-                validation_errors.append(error_msg)
-                return {}, validation_errors  # if not validate else None
         except json.JSONDecodeError as e:
             error_msg = f"Row {row_idx}: Invalid JSON in patch document: {e}"
             logger.warning(error_msg)
             validation_errors.append(error_msg)
             return {}, validation_errors  # if not validate else None
+        else:
+            error_msg = (
+                f"Row {row_idx}: JSON must be an object/dictionary, "
+                f"got {type(parsed).__name__}"
+            )
+            logger.warning(error_msg)
+            validation_errors.append(error_msg)
+            return {}, validation_errors  # if not validate else None
     # For other types, log warning
     error_msg = f"Row {row_idx}: Unsupported patch type: {type(patch).__name__}"
@@ -1165,7 +1196,7 @@ def _ensure_dict_patch(
 def _format_note_for_storage(
     note_text: str,
     current_time_ms: int,
-):
+) -> list[str] | None:
     if pd.isna(note_text):
         return None
     note_obj = {
@@ -1225,9 +1256,7 @@ def _log_flight_update_summary(
         logger.warning("Flight update response missing counts", extra=metrics)
     else:
         all_processed = int(spans_processed) == int(total_spans)
-        msg = (
-            "✅ All spans processed" if all_processed else "Partial processing"
-        )
+        msg = "All spans processed" if all_processed else "Partial processing"
         logger.info(msg, extra=metrics)
     # Emit individual error lines (structured per-error, easy to aggregate)
@@ -1246,7 +1275,7 @@ def _message_to_dict(
     msg: message.Message,
     preserve_names: bool = True,
     use_int_enums: bool = False,
-):
+) -> dict[str, object]:
     return json_format.MessageToDict(
         msg,
         preserving_proto_field_name=preserve_names,

arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl

arize 8.0.0a22py3-none-any.whl → 8.0.0b0py3-none-any.whl