PyPI - arize - Versions diffs - 8.0.0a22__py3-none-any.whl → 8.0.0a23__py3-none-any.whl - Mend

arize 8.0.0a22py3-none-any.whl → 8.0.0a23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

arize/__init__.py +17 -9
arize/_exporter/client.py +55 -36
arize/_exporter/parsers/tracing_data_parser.py +41 -30
arize/_exporter/validation.py +3 -3
arize/_flight/client.py +207 -76
arize/_generated/api_client/__init__.py +30 -6
arize/_generated/api_client/api/__init__.py +1 -0
arize/_generated/api_client/api/datasets_api.py +864 -190
arize/_generated/api_client/api/experiments_api.py +167 -131
arize/_generated/api_client/api/projects_api.py +1197 -0
arize/_generated/api_client/api_client.py +2 -2
arize/_generated/api_client/configuration.py +42 -34
arize/_generated/api_client/exceptions.py +2 -2
arize/_generated/api_client/models/__init__.py +15 -4
arize/_generated/api_client/models/dataset.py +10 -10
arize/_generated/api_client/models/dataset_example.py +111 -0
arize/_generated/api_client/models/dataset_example_update.py +100 -0
arize/_generated/api_client/models/dataset_version.py +13 -13
arize/_generated/api_client/models/datasets_create_request.py +16 -8
arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
arize/_generated/api_client/models/datasets_list200_response.py +10 -4
arize/_generated/api_client/models/experiment.py +14 -16
arize/_generated/api_client/models/experiment_run.py +108 -0
arize/_generated/api_client/models/experiment_run_create.py +102 -0
arize/_generated/api_client/models/experiments_create_request.py +16 -10
arize/_generated/api_client/models/experiments_list200_response.py +10 -4
arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
arize/_generated/api_client/models/primitive_value.py +172 -0
arize/_generated/api_client/models/problem.py +100 -0
arize/_generated/api_client/models/project.py +99 -0
arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
arize/_generated/api_client/models/projects_list200_response.py +106 -0
arize/_generated/api_client/rest.py +2 -2
arize/_generated/api_client/test/test_dataset.py +4 -2
arize/_generated/api_client/test/test_dataset_example.py +56 -0
arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
arize/_generated/api_client/test/test_dataset_version.py +7 -2
arize/_generated/api_client/test/test_datasets_api.py +27 -13
arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
arize/_generated/api_client/test/test_experiment.py +2 -4
arize/_generated/api_client/test/test_experiment_run.py +56 -0
arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
arize/_generated/api_client/test/test_experiments_api.py +6 -6
arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
arize/_generated/api_client/test/test_problem.py +57 -0
arize/_generated/api_client/test/test_project.py +58 -0
arize/_generated/api_client/test/test_projects_api.py +59 -0
arize/_generated/api_client/test/test_projects_create_request.py +54 -0
arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
arize/_generated/api_client_README.md +43 -29
arize/_generated/protocol/flight/flight_pb2.py +400 -0
arize/_lazy.py +27 -19
arize/client.py +268 -55
arize/config.py +365 -116
arize/constants/__init__.py +1 -0
arize/constants/config.py +11 -4
arize/constants/ml.py +6 -4
arize/constants/openinference.py +2 -0
arize/constants/pyarrow.py +2 -0
arize/constants/spans.py +3 -1
arize/datasets/__init__.py +1 -0
arize/datasets/client.py +299 -84
arize/datasets/errors.py +32 -2
arize/datasets/validation.py +18 -8
arize/embeddings/__init__.py +2 -0
arize/embeddings/auto_generator.py +23 -19
arize/embeddings/base_generators.py +89 -36
arize/embeddings/constants.py +2 -0
arize/embeddings/cv_generators.py +26 -4
arize/embeddings/errors.py +27 -5
arize/embeddings/nlp_generators.py +31 -12
arize/embeddings/tabular_generators.py +32 -20
arize/embeddings/usecases.py +12 -2
arize/exceptions/__init__.py +1 -0
arize/exceptions/auth.py +11 -1
arize/exceptions/base.py +29 -4
arize/exceptions/models.py +21 -2
arize/exceptions/parameters.py +31 -0
arize/exceptions/spaces.py +12 -1
arize/exceptions/types.py +86 -7
arize/exceptions/values.py +220 -20
arize/experiments/__init__.py +1 -0
arize/experiments/client.py +389 -285
arize/experiments/evaluators/__init__.py +1 -0
arize/experiments/evaluators/base.py +74 -41
arize/experiments/evaluators/exceptions.py +6 -3
arize/experiments/evaluators/executors.py +121 -73
arize/experiments/evaluators/rate_limiters.py +106 -57
arize/experiments/evaluators/types.py +34 -7
arize/experiments/evaluators/utils.py +65 -27
arize/experiments/functions.py +103 -101
arize/experiments/tracing.py +52 -44
arize/experiments/types.py +56 -31
arize/logging.py +54 -22
arize/models/__init__.py +1 -0
arize/models/batch_validation/__init__.py +1 -0
arize/models/batch_validation/errors.py +543 -65
arize/models/batch_validation/validator.py +339 -300
arize/models/bounded_executor.py +20 -7
arize/models/casting.py +75 -29
arize/models/client.py +326 -107
arize/models/proto.py +95 -40
arize/models/stream_validation.py +42 -14
arize/models/surrogate_explainer/__init__.py +1 -0
arize/models/surrogate_explainer/mimic.py +24 -13
arize/pre_releases.py +43 -0
arize/projects/__init__.py +1 -0
arize/projects/client.py +129 -0
arize/regions.py +40 -0
arize/spans/__init__.py +1 -0
arize/spans/client.py +130 -106
arize/spans/columns.py +13 -0
arize/spans/conversion.py +54 -38
arize/spans/validation/__init__.py +1 -0
arize/spans/validation/annotations/__init__.py +1 -0
arize/spans/validation/annotations/annotations_validation.py +6 -4
arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
arize/spans/validation/annotations/value_validation.py +35 -11
arize/spans/validation/common/__init__.py +1 -0
arize/spans/validation/common/argument_validation.py +33 -8
arize/spans/validation/common/dataframe_form_validation.py +35 -9
arize/spans/validation/common/errors.py +211 -11
arize/spans/validation/common/value_validation.py +80 -13
arize/spans/validation/evals/__init__.py +1 -0
arize/spans/validation/evals/dataframe_form_validation.py +28 -8
arize/spans/validation/evals/evals_validation.py +34 -4
arize/spans/validation/evals/value_validation.py +26 -3
arize/spans/validation/metadata/__init__.py +1 -1
arize/spans/validation/metadata/argument_validation.py +14 -5
arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
arize/spans/validation/metadata/value_validation.py +24 -10
arize/spans/validation/spans/__init__.py +1 -0
arize/spans/validation/spans/dataframe_form_validation.py +34 -13
arize/spans/validation/spans/spans_validation.py +35 -4
arize/spans/validation/spans/value_validation.py +76 -7
arize/types.py +293 -157
arize/utils/__init__.py +1 -0
arize/utils/arrow.py +31 -15
arize/utils/cache.py +34 -6
arize/utils/dataframe.py +19 -2
arize/utils/online_tasks/__init__.py +2 -0
arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
arize/utils/openinference_conversion.py +44 -5
arize/utils/proto.py +10 -0
arize/utils/size.py +5 -3
arize/version.py +3 -1
{arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
arize-8.0.0a23.dist-info/RECORD +174 -0
{arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
arize/_generated/protocol/flight/export_pb2.py +0 -61
arize/_generated/protocol/flight/ingest_pb2.py +0 -365
arize-8.0.0a22.dist-info/RECORD +0 -146
arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12

arize/regions.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""Region definitions and configuration for Arize deployment zones."""
+from dataclasses import dataclass
+from enum import StrEnum
+from arize.constants.config import DEFAULT_FLIGHT_PORT
+class Region(StrEnum):
+    """Enum representing available Arize deployment regions."""
+    US_CENTRAL_1 = "us-central-1a"
+    EU_WEST_1 = "eu-west-1a"
+    CA_CENTRAL_1 = "ca-central-1a"
+    US_EAST_1 = "us-east-1b"
+    UNSPECIFIED = ""
+@dataclass(frozen=True)
+class RegionEndpoints:
+    """Container for region-specific API endpoint hostnames and ports."""
+    api_host: str
+    otlp_host: str
+    flight_host: str
+    flight_port: int
+def _get_region_endpoints(region: Region) -> RegionEndpoints:
+    return RegionEndpoints(
+        api_host=f"api.{region}.arize.com",
+        otlp_host=f"otlp.{region}.arize.com",
+        flight_host=f"flight.{region}.arize.com",
+        flight_port=DEFAULT_FLIGHT_PORT,
+    )
+REGION_ENDPOINTS: dict[Region, RegionEndpoints] = {
+    r: _get_region_endpoints(r) for r in Region if r != Region.UNSPECIFIED
+}

arize/spans/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ """LLM tracing spans functionality for the Arize SDK."""

arize/spans/client.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Client implementation for managing spans and traces in the Arize platform."""
 # type: ignore[pb2]
 from __future__ import annotations
@@ -6,7 +8,7 @@ import logging
 import re
 from datetime import datetime, timezone
 from functools import partial
-from typing import TYPE_CHECKING, Any, Dict, List
+from typing import TYPE_CHECKING, Any
 import numpy as np
 import pandas as pd
@@ -16,10 +18,6 @@ from google.protobuf import json_format, message
 from arize._exporter.client import ArizeExportClient
 from arize._flight.client import ArizeFlightClient, FlightPostArrowFileResponse
 from arize._flight.types import FlightRequestType
-from arize._generated.protocol.flight.ingest_pb2 import (
-    WriteSpanAnnotationResponse,
-    WriteSpanEvaluationResponse,
-)
 from arize.constants.spans import DEFAULT_DATETIME_FMT
 from arize.exceptions.base import (
     INVALID_ARROW_CONVERSION_MSG,
@@ -29,9 +27,7 @@ from arize.exceptions.models import MissingProjectNameError
 from arize.exceptions.spaces import MissingSpaceIDError
 from arize.logging import CtxAdapter
 from arize.types import Environments, SimilaritySearchParams
-from arize.utils.arrow import (
-    post_arrow_table,
-)
+from arize.utils.arrow import post_arrow_table
 from arize.utils.dataframe import (
     remove_extraneous_columns,
     reset_dataframe_index,
@@ -41,13 +37,21 @@ from arize.utils.proto import get_pb_schema_tracing
 if TYPE_CHECKING:
     import requests
+    from arize._generated.protocol.flight import flight_pb2
     from arize.config import SDKConfiguration
 logger = logging.getLogger(__name__)
 class SpansClient:
-    def __init__(self, *, sdk_config: SDKConfiguration):
+    """Client for logging LLM tracing spans and evaluations to Arize."""
+    def __init__(self, *, sdk_config: SDKConfiguration) -> None:
+        """Initialize the spans client with SDK configuration.
+        Args:
+            sdk_config: SDK configuration containing API endpoints and credentials.
+        """
         self._sdk_config = sdk_config
     def log(
@@ -62,12 +66,14 @@ class SpansClient:
         timeout: float | None = None,
         tmp_dir: str = "",
     ) -> requests.Response:
-        """
-        Logs a pandas dataframe containing LLM tracing data to Arize via a POST request. Returns a
-        :class:`Response` object from the Requests HTTP library to ensure successful delivery of
-        records.
+        """Logs a pandas dataframe containing LLM tracing data to Arize via a POST request.
+        Returns a :class:`Response` object from the Requests HTTP library to ensure
+        successful delivery of records.
         Args:
+            space_id (str): The space ID where the project resides.
+            project_name (str): A unique name to identify your project in the Arize platform.
             dataframe (pd.DataFrame): The dataframe containing the LLM traces.
             evals_dataframe (pd.DataFrame, optional): A dataframe containing LLM evaluations data.
                 The evaluations are joined to their corresponding spans via a left outer join, i.e.,
@@ -76,11 +82,10 @@ class SpansClient:
                 Defaults to "%Y-%m-%dT%H:%M:%S.%f+00:00".
             validate (bool, optional): When set to True, validation is run before sending data.
                 Defaults to True.
-            tmp_dir (str, optional): Temporary directory/file to store the serialized data in binary
-                before sending to Arize.
             timeout (float, optional): You can stop waiting for a response after a given number
                 of seconds with the timeout parameter. Defaults to None.
-            project_name (str, optional): A unique name to identify your project in the Arize platform.
+            tmp_dir (str, optional): Temporary directory/file to store the serialized data in binary
+                before sending to Arize.
         Returns:
             `Response` object
@@ -220,12 +225,12 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         proto_schema = get_pb_schema_tracing(project_name=project_name)
@@ -262,27 +267,23 @@ class SpansClient:
         force_http: bool = False,
         timeout: float | None = None,
         tmp_dir: str = "",
-    ) -> WriteSpanEvaluationResponse:
-        """
-        Logs a pandas dataframe containing LLM evaluations data to Arize via a Flight gRPC request.
-        The dataframe must contain a column `context.span_id`
-        such that Arize can assign each evaluation to its respective span.
+    ) -> flight_pb2.WriteSpanEvaluationResponse:
+        """Logs a pandas dataframe containing LLM evaluations data to Arize via a Flight gRPC request.
+        The dataframe must contain a column `context.span_id` such that Arize can assign
+        each evaluation to its respective span.
         Args:
+            space_id (str): The space ID where the project resides.
+            project_name (str): A unique name to identify your project in the Arize platform.
             dataframe (pd.DataFrame): A dataframe containing LLM evaluations data.
-            model_id (str): A unique name to identify your model in the Arize platform.
-                (Deprecated: Use `project_name` instead.)
-            model_version (str, optional): Used to group a subset of traces a given
-                model_id to compare and track changes. It should match the model_id of the spans
-                sent previously, to which evaluations will be assigned. Defaults to None.
             validate (bool, optional): When set to True, validation is run before sending data.
                 Defaults to True.
-            path (str, optional): Temporary directory/file to store the serialized data in binary
-                before sending to Arize.
+            force_http (bool, optional): Force the use of HTTP for data upload. Defaults to False.
             timeout (float, optional): You can stop waiting for a response after a given number
                 of seconds with the timeout parameter. Defaults to None.
-            project_name (str, optional): A unique name to identify your project in the Arize platform.
-                Either model_id or project_name must be provided.
+            tmp_dir (str, optional): Temporary directory/file to store the serialized data in binary
+                before sending to Arize.
         """
         from arize.spans.columns import EVAL_COLUMN_PATTERN, SPAN_SPAN_ID_COL
         from arize.spans.validation.evals import evals_validation
@@ -358,12 +359,12 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(evals_df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         if force_http:
@@ -395,8 +396,8 @@ class SpansClient:
         response = None
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -409,8 +410,8 @@ class SpansClient:
                     request_type=request_type,
                 )
             except Exception as e:
-                msg = f"Error during update request: {str(e)}"
-                log.error(msg)
+                msg = f"Error during update request: {e!s}"
+                log.exception(msg)
                 raise RuntimeError(msg) from e
         if response is None:
@@ -437,18 +438,18 @@ class SpansClient:
         project_name: str,
         dataframe: pd.DataFrame,
         validate: bool = True,
-    ) -> WriteSpanAnnotationResponse:
-        """
-        Logs a pandas dataframe containing LLM span annotations to Arize via a Flight gRPC request.
-        The dataframe must contain a column `context.span_id`
-        such that Arize can assign each annotation to its respective span.
-        Annotation columns should follow the pattern `annotation.<name>.<suffix>` where suffix is
-        either `label` or `score`. An optional `annotation.notes` column can be included for
-        free-form text notes.
+    ) -> flight_pb2.WriteSpanAnnotationResponse:
+        """Logs a pandas dataframe containing LLM span annotations to Arize via a Flight gRPC request.
+        The dataframe must contain a column `context.span_id` such that Arize can assign
+        each annotation to its respective span. Annotation columns should follow the pattern
+        `annotation.<name>.<suffix>` where suffix is either `label` or `score`. An optional
+        `annotation.notes` column can be included for free-form text notes.
         Args:
-            dataframe (pd.DataFrame): A dataframe containing LLM annotation data.
+            space_id (str): The space ID where the project resides.
             project_name (str): A unique name to identify your project in the Arize platform.
+            dataframe (pd.DataFrame): A dataframe containing LLM annotation data.
             validate (bool, optional): When set to True, validation is run before sending data.
                 Defaults to True.
         """
@@ -588,12 +589,12 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(anno_df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         if ANNOTATION_NOTES_COLUMN_NAME in anno_df.columns:
@@ -611,8 +612,8 @@ class SpansClient:
         response = None
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -625,8 +626,8 @@ class SpansClient:
                     request_type=request_type,
                 )
             except Exception as e:
-                msg = f"Error during update request: {str(e)}"
-                log.error(msg)
+                msg = f"Error during update request: {e!s}"
+                log.exception(msg)
                 raise RuntimeError(msg) from e
         if response is None:
@@ -654,9 +655,10 @@ class SpansClient:
         dataframe: pd.DataFrame,
         patch_document_column_name: str = "patch_document",
         validate: bool = True,
-    ) -> Dict[str, Any]:
-        """
-        Log metadata updates using JSON Merge Patch format. This method is only supported for LLM model types.
+    ) -> dict[str, Any]:
+        """Log metadata updates using JSON Merge Patch format.
+        This method is only supported for LLM model types.
         The dataframe must contain a column `context.span_id` to identify spans and either:
         1. A column with JSON patch documents (specified by patch_document_column_name), or
@@ -674,8 +676,9 @@ class SpansClient:
           Note: This differs from standard JSON Merge Patch where null values remove fields.
         Args:
-            dataframe: DataFrame with span_ids and either patch documents or metadata field columns.
+            space_id: The space ID where the project resides.
             project_name: A unique name to identify your project in the Arize platform.
+            dataframe: DataFrame with span_ids and either patch documents or metadata field columns.
             patch_document_column_name: Name of the column containing JSON patch documents.
                 Defaults to "patch_document".
             validate: When set to True, validation is run before sending data.
@@ -813,11 +816,10 @@ class SpansClient:
             )
         # Create a new column for patch documents if we're going to use it
-        if has_metadata_fields or has_patch_document:
-            # Use 'patch_document' as the standardized column name for downstream processing
-            final_patch_column = "patch_document"
-            if final_patch_column not in metadata_df.columns:
-                metadata_df[final_patch_column] = None
+        # Use 'patch_document' as the standardized column name for downstream processing
+        final_patch_column = "patch_document"
+        if final_patch_column not in metadata_df.columns:
+            metadata_df[final_patch_column] = None
         # Process metadata field columns if they exist
         if has_metadata_fields:
@@ -865,7 +867,7 @@ class SpansClient:
                 if patch:
                     processed_patches.append(patch)
                 if errors:
-                    validation_errors.append(errors)
+                    validation_errors.extend(errors)
             # If validation is enabled and errors found, raise ValidationFailure
             if validate and validation_errors:
@@ -922,9 +924,11 @@ class SpansClient:
             metadata_df[final_patch_column] = metadata_df[
                 final_patch_column
             ].apply(
-                lambda p: json.dumps(p)
-                if not isinstance(p, float) or not np.isnan(p)
-                else json.dumps({})
+                lambda p: (
+                    json.dumps(p)
+                    if not isinstance(p, float) or not np.isnan(p)
+                    else json.dumps({})
+                )
             )
         # Convert to Arrow table
@@ -932,20 +936,20 @@ class SpansClient:
             log.debug("Converting data to Arrow format")
             pa_table = pa.Table.from_pandas(metadata_df, preserve_index=False)
         except pa.ArrowInvalid as e:
-            log.error(f"{INVALID_ARROW_CONVERSION_MSG}: {str(e)}")
+            log.exception(INVALID_ARROW_CONVERSION_MSG)
             raise pa.ArrowInvalid(
-                f"Error converting to Arrow format: {str(e)}"
+                f"Error converting to Arrow format: {e!s}"
             ) from e
-        except Exception as e:
-            log.error(f"Unexpected error creating Arrow table: {str(e)}")
+        except Exception:
+            log.exception("Unexpected error creating Arrow table")
             raise
         request_type = FlightRequestType.METADATA
         response = None
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -958,8 +962,8 @@ class SpansClient:
                     request_type=request_type,
                 )
             except Exception as e:
-                msg = f"Error during update request: {str(e)}"
-                log.error(msg)
+                msg = f"Error during update request: {e!s}"
+                log.exception(msg)
                 raise RuntimeError(msg) from e
         if response is None:
@@ -987,14 +991,25 @@ class SpansClient:
         start_time: datetime,
         end_time: datetime,
         where: str = "",
-        columns: List | None = None,
+        columns: list | None = None,
         similarity_search_params: SimilaritySearchParams | None = None,
         stream_chunk_size: int | None = None,
     ) -> pd.DataFrame:
+        """Export span data from Arize to a pandas DataFrame.
+        Retrieves trace/span data from the specified project within a time range
+        and returns it as a pandas DataFrame. Supports filtering with SQL-like
+        WHERE clauses and similarity search for semantic retrieval.
+        Returns:
+        -------
+            pd.DataFrame: DataFrame containing the requested span data with columns
+                for span metadata, attributes, events, and any custom fields.
+        """
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -1017,19 +1032,27 @@ class SpansClient:
     def export_to_parquet(
         self,
         *,
+        path: str,
         space_id: str,
         project_name: str,
         start_time: datetime,
         end_time: datetime,
         where: str = "",
-        columns: List | None = None,
+        columns: list | None = None,
         similarity_search_params: SimilaritySearchParams | None = None,
         stream_chunk_size: int | None = None,
-    ) -> pd.DataFrame:
+    ) -> None:
+        """Export span data from Arize to a Parquet file.
+        Retrieves trace/span data from the specified project within a time range
+        and writes it directly to a Parquet file at the specified path. Supports
+        filtering with SQL-like WHERE clauses and similarity search for semantic
+        retrieval. Efficient for large datasets and long-term storage.
+        """
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
-            host=self._sdk_config.flight_server_host,
-            port=self._sdk_config.flight_server_port,
+            host=self._sdk_config.flight_host,
+            port=self._sdk_config.flight_port,
             scheme=self._sdk_config.flight_scheme,
             request_verify=self._sdk_config.request_verify,
             max_chunksize=self._sdk_config.pyarrow_max_chunksize,
@@ -1038,6 +1061,7 @@ class SpansClient:
                 flight_client=flight_client,
             )
             return exporter.export_to_parquet(
+                path=path,
                 space_id=space_id,
                 model_id=project_name,
                 environment=Environments.TRACING,
@@ -1050,7 +1074,7 @@ class SpansClient:
             )
-def _build_patch_document(row):
+def _build_patch_document(row: pd.Series) -> dict[str, object]:
     # Extract and preserve metadata values with proper types
     patch = {}
     for key in row.index:
@@ -1070,8 +1094,11 @@ def _build_patch_document(row):
 def _process_patch_document(
-    metadata_df, patch_document_column_name, field_patches, row_idx
-):
+    metadata_df: pd.DataFrame,
+    patch_document_column_name: str,
+    field_patches: pd.DataFrame,
+    row_idx: int,
+) -> dict[str, object]:
     # Get the field patch for this row
     field_patch = field_patches.iloc[row_idx]
@@ -1111,15 +1138,14 @@ def _process_patch_document(
         explicit_patch = {}
     # Merge patches - explicit patch takes precedence
-    merged_patch = {**field_patch, **explicit_patch}
-    return merged_patch
+    return {**field_patch, **explicit_patch}
 def _ensure_dict_patch(
     metadata_df: pd.DataFrame,
     final_patch_column: str,
     row_idx: int,
-):
+) -> tuple[dict[str, object], list[str]]:
     patch = metadata_df.loc[row_idx, final_patch_column]
     validation_errors = []
@@ -1141,19 +1167,19 @@ def _ensure_dict_patch(
             parsed = json.loads(patch)
             if isinstance(parsed, dict):
                 return parsed
-            else:
-                error_msg = (
-                    f"Row {row_idx}: JSON must be an object/dictionary, "
-                    f"got {type(parsed).__name__}"
-                )
-                logger.warning(error_msg)
-                validation_errors.append(error_msg)
-                return {}, validation_errors  # if not validate else None
         except json.JSONDecodeError as e:
             error_msg = f"Row {row_idx}: Invalid JSON in patch document: {e}"
             logger.warning(error_msg)
             validation_errors.append(error_msg)
             return {}, validation_errors  # if not validate else None
+        else:
+            error_msg = (
+                f"Row {row_idx}: JSON must be an object/dictionary, "
+                f"got {type(parsed).__name__}"
+            )
+            logger.warning(error_msg)
+            validation_errors.append(error_msg)
+            return {}, validation_errors  # if not validate else None
     # For other types, log warning
     error_msg = f"Row {row_idx}: Unsupported patch type: {type(patch).__name__}"
@@ -1165,7 +1191,7 @@ def _ensure_dict_patch(
 def _format_note_for_storage(
     note_text: str,
     current_time_ms: int,
-):
+) -> list[str] | None:
     if pd.isna(note_text):
         return None
     note_obj = {
@@ -1225,9 +1251,7 @@ def _log_flight_update_summary(
         logger.warning("Flight update response missing counts", extra=metrics)
     else:
         all_processed = int(spans_processed) == int(total_spans)
-        msg = (
-            "✅ All spans processed" if all_processed else "Partial processing"
-        )
+        msg = "All spans processed" if all_processed else "Partial processing"
         logger.info(msg, extra=metrics)
     # Emit individual error lines (structured per-error, easy to aggregate)
@@ -1246,7 +1270,7 @@ def _message_to_dict(
     msg: message.Message,
     preserve_names: bool = True,
     use_int_enums: bool = False,
-):
+) -> dict[str, object]:
     return json_format.MessageToDict(
         msg,
         preserving_proto_field_name=preserve_names,

arize/spans/columns.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""Span column definitions and OpenInference semantic conventions."""
 from enum import Enum
 import openinference.semconv.trace as oinf
@@ -5,6 +7,8 @@ import opentelemetry.semconv.trace as otel
 class SpanColumnDataType(Enum):
+    """Enum representing supported data types for span columns."""
     BOOL = 1
     NUMERIC = 2
     STRING = 3
@@ -15,12 +19,21 @@ class SpanColumnDataType(Enum):
 class SpanColumn:
+    """Configuration for a custom span column with name, data type, and annotation settings."""
     def __init__(
         self,
         name: str,
         data_type: SpanColumnDataType,
         required: bool = False,
     ) -> None:
+        """Initialize a span column configuration.
+        Args:
+            name: Name of the span column.
+            data_type: Data type of the column values.
+            required: Whether the column is required.
+        """
         self.name = name
         self.required = required
         self.data_type = data_type

arize 8.0.0a22__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

arize 8.0.0a22py3-none-any.whl → 8.0.0a23py3-none-any.whl