PyPI - arize - Versions diffs - 8.0.0b2__py3-none-any.whl → 8.0.1__py3-none-any.whl - Mend

arize 8.0.0b2py3-none-any.whl → 8.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

arize/__init__.py +8 -1
arize/_exporter/client.py +18 -17
arize/_exporter/parsers/tracing_data_parser.py +9 -4
arize/_exporter/validation.py +1 -1
arize/_flight/client.py +33 -13
arize/_lazy.py +37 -2
arize/client.py +61 -35
arize/config.py +168 -14
arize/constants/config.py +1 -0
arize/datasets/client.py +32 -19
arize/embeddings/auto_generator.py +14 -7
arize/embeddings/base_generators.py +15 -9
arize/embeddings/cv_generators.py +2 -2
arize/embeddings/nlp_generators.py +8 -8
arize/embeddings/tabular_generators.py +5 -5
arize/exceptions/config.py +22 -0
arize/exceptions/parameters.py +1 -1
arize/exceptions/values.py +8 -5
arize/experiments/__init__.py +4 -0
arize/experiments/client.py +17 -11
arize/experiments/evaluators/base.py +6 -3
arize/experiments/evaluators/executors.py +6 -4
arize/experiments/evaluators/rate_limiters.py +3 -1
arize/experiments/evaluators/types.py +7 -5
arize/experiments/evaluators/utils.py +7 -5
arize/experiments/functions.py +111 -48
arize/experiments/tracing.py +4 -1
arize/experiments/types.py +31 -26
arize/logging.py +53 -32
arize/ml/batch_validation/validator.py +82 -70
arize/ml/bounded_executor.py +25 -6
arize/ml/casting.py +45 -27
arize/ml/client.py +35 -28
arize/ml/proto.py +16 -17
arize/ml/stream_validation.py +63 -25
arize/ml/surrogate_explainer/mimic.py +15 -7
arize/ml/types.py +26 -12
arize/pre_releases.py +7 -6
arize/py.typed +0 -0
arize/regions.py +10 -10
arize/spans/client.py +113 -21
arize/spans/conversion.py +7 -5
arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
arize/spans/validation/annotations/value_validation.py +11 -14
arize/spans/validation/common/dataframe_form_validation.py +1 -1
arize/spans/validation/common/value_validation.py +10 -13
arize/spans/validation/evals/value_validation.py +1 -1
arize/spans/validation/metadata/argument_validation.py +1 -1
arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
arize/spans/validation/metadata/value_validation.py +23 -1
arize/utils/arrow.py +37 -1
arize/utils/online_tasks/dataframe_preprocessor.py +8 -4
arize/utils/proto.py +0 -1
arize/utils/types.py +6 -6
arize/version.py +1 -1
{arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/METADATA +18 -3
{arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/RECORD +60 -58
{arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/WHEEL +0 -0
{arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/licenses/LICENSE +0 -0
{arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/licenses/NOTICE +0 -0

arize/regions.py CHANGED Viewed

@@ -1,18 +1,18 @@
 """Region definitions and configuration for Arize deployment zones."""
 from dataclasses import dataclass
-from enum import StrEnum
+from enum import Enum
 from arize.constants.config import DEFAULT_FLIGHT_PORT
-class Region(StrEnum):
+class Region(Enum):
     """Enum representing available Arize deployment regions."""
-    US_CENTRAL_1 = "us-central-1a"
-    EU_WEST_1 = "eu-west-1a"
-    CA_CENTRAL_1 = "ca-central-1a"
-    US_EAST_1 = "us-east-1b"
+    CA_CENTRAL_1A = "ca-central-1a"
+    EU_WEST_1A = "eu-west-1a"
+    US_CENTRAL_1A = "us-central-1a"
+    US_EAST_1B = "us-east-1b"
     UNSET = ""
@@ -28,13 +28,13 @@ class RegionEndpoints:
 def _get_region_endpoints(region: Region) -> RegionEndpoints:
     return RegionEndpoints(
-        api_host=f"api.{region}.arize.com",
-        otlp_host=f"otlp.{region}.arize.com",
-        flight_host=f"flight.{region}.arize.com",
+        api_host=f"api.{region.value}.arize.com",
+        otlp_host=f"otlp.{region.value}.arize.com",
+        flight_host=f"flight.{region.value}.arize.com",
         flight_port=DEFAULT_FLIGHT_PORT,
     )
 REGION_ENDPOINTS: dict[Region, RegionEndpoints] = {
-    r: _get_region_endpoints(r) for r in Region if r != Region.UNSET
+    r: _get_region_endpoints(r) for r in list(Region) if r != Region.UNSET
 }

arize/spans/client.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# type: ignore[pb2]
 """Client implementation for managing spans and traces in the Arize platform."""
 from __future__ import annotations
@@ -21,12 +20,16 @@ from arize._flight.types import FlightRequestType
 from arize.constants.spans import DEFAULT_DATETIME_FMT
 from arize.exceptions.base import (
     INVALID_ARROW_CONVERSION_MSG,
+    ValidationError,
     ValidationFailure,
 )
 from arize.exceptions.models import MissingProjectNameError
 from arize.exceptions.spaces import MissingSpaceIDError
 from arize.logging import CtxAdapter
 from arize.ml.types import Environments
+from arize.spans.validation.metadata.value_validation import (
+    InvalidPatchDocumentFormat,
+)
 from arize.utils.arrow import post_arrow_table
 from arize.utils.dataframe import (
     remove_extraneous_columns,
@@ -850,7 +853,8 @@ class SpansClient:
                     )
                     for idx in range(len(metadata_df))
                 ]
-                metadata_df[final_patch_column] = merged_patches
+                # Type ignore: pandas DataFrame column assignment type is overly restrictive
+                metadata_df[final_patch_column] = merged_patches  # type: ignore[assignment]
             else:
                 # Just use the field patches directly
                 metadata_df[final_patch_column] = field_patches
@@ -887,7 +891,8 @@ class SpansClient:
                     log.error(e)
                 raise ValidationFailure(validation_errors)
-            metadata_df[final_patch_column] = processed_patches
+            # Type ignore: pandas DataFrame column assignment type is overly restrictive
+            metadata_df[final_patch_column] = processed_patches  # type: ignore[assignment]
         # Run validations on the processed dataframe
         if validate:
@@ -1054,8 +1059,26 @@ class SpansClient:
         Retrieves trace/span data from the specified project within a time range
         and writes it directly to a Parquet file at the specified path. Supports
-        filtering with SQL-like WHERE clauses and similarity search for semantic
-        retrieval. Efficient for large datasets and long-term storage.
+        filtering with SQL-like WHERE clauses for efficient querying. Ideal for
+        large datasets and long-term storage.
+        Args:
+            path: The file path where the Parquet file will be written.
+            space_id: The space ID where the project resides.
+            project_name: The name of the project to export span data from.
+            start_time: Start of the time range (inclusive) as a datetime object.
+            end_time: End of the time range (inclusive) as a datetime object.
+            where: Optional SQL-like WHERE clause to filter rows (e.g., "span.status_code = 'ERROR'").
+            columns: Optional list of column names to include. If None, all columns are returned.
+            stream_chunk_size: Optional chunk size for streaming large result sets.
+        Raises:
+            RuntimeError: If the Flight client request fails or returns no response.
+        Notes:
+            - Uses Apache Arrow Flight for efficient data transfer
+            - Data is written directly to the specified path as a Parquet file
+            - Large exports may benefit from specifying stream_chunk_size
         """
         with ArizeFlightClient(
             api_key=self._sdk_config.api_key,
@@ -1068,7 +1091,7 @@ class SpansClient:
             exporter = ArizeExportClient(
                 flight_client=flight_client,
             )
-            return exporter.export_to_parquet(
+            exporter.export_to_parquet(
                 path=path,
                 space_id=space_id,
                 model_id=project_name,
@@ -1082,6 +1105,15 @@ class SpansClient:
 def _build_patch_document(row: pd.Series) -> dict[str, object]:
+    """Build a patch document from a pandas Series row by extracting metadata fields.
+    Args:
+        row: A pandas Series representing a row of data with potential metadata columns.
+    Returns:
+        dict[str, object]: A dictionary mapping metadata field names (without the
+            'attributes.metadata.' prefix) to their values, preserving arrays and scalars.
+    """
     # Extract and preserve metadata values with proper types
     patch = {}
     for key in row.index:
@@ -1103,9 +1135,21 @@ def _build_patch_document(row: pd.Series) -> dict[str, object]:
 def _process_patch_document(
     metadata_df: pd.DataFrame,
     patch_document_column_name: str,
-    field_patches: pd.DataFrame,
+    field_patches: pd.Series[Any],
     row_idx: int,
 ) -> dict[str, object]:
+    """Process and merge patch documents from field patches and explicit patch column.
+    Args:
+        metadata_df: DataFrame containing the metadata with patch documents.
+        patch_document_column_name: Name of the column containing explicit patch documents.
+        field_patches: DataFrame containing patches derived from individual metadata fields.
+        row_idx: The row index to process.
+    Returns:
+        dict[str, object]: Merged patch document where explicit patches take precedence over
+            field patches. Returns empty dict if patch document is invalid or missing.
+    """
     # Get the field patch for this row
     field_patch = field_patches.iloc[row_idx]
@@ -1152,9 +1196,21 @@ def _ensure_dict_patch(
     metadata_df: pd.DataFrame,
     final_patch_column: str,
     row_idx: int,
-) -> tuple[dict[str, object], list[str]]:
+) -> tuple[dict[str, object], list[ValidationError]]:
+    """Ensure a patch value is a dictionary, converting from JSON string if needed.
+    Args:
+        metadata_df: DataFrame containing the patch data.
+        final_patch_column: Name of the column containing the final patch document.
+        row_idx: The row index to process.
+    Returns:
+        tuple[dict[str, object], list[ValidationError]]: A tuple containing:
+            - The patch as a dictionary (empty dict if invalid or missing)
+            - List of validation errors (empty if no errors)
+    """
     patch = metadata_df.loc[row_idx, final_patch_column]
-    validation_errors = []
+    validation_errors: list[ValidationError] = []
     # For None/null values, return an empty dict
     if patch is None:
@@ -1173,25 +1229,26 @@ def _ensure_dict_patch(
         try:
             parsed = json.loads(patch)
             if isinstance(parsed, dict):
-                return parsed
+                return parsed, validation_errors
         except json.JSONDecodeError as e:
-            error_msg = f"Row {row_idx}: Invalid JSON in patch document: {e}"
-            logger.warning(error_msg)
-            validation_errors.append(error_msg)
+            error_msg = f"Invalid JSON in patch document: {e}"
+            logger.warning(f"Row {row_idx}: {error_msg}")
+            validation_errors.append(
+                InvalidPatchDocumentFormat(row_idx, error_msg)
+            )
             return {}, validation_errors  # if not validate else None
         else:
-            error_msg = (
-                f"Row {row_idx}: JSON must be an object/dictionary, "
-                f"got {type(parsed).__name__}"
+            error_msg = f"JSON must be an object/dictionary, got {type(parsed).__name__}"
+            logger.warning(f"Row {row_idx}: {error_msg}")
+            validation_errors.append(
+                InvalidPatchDocumentFormat(row_idx, error_msg)
             )
-            logger.warning(error_msg)
-            validation_errors.append(error_msg)
             return {}, validation_errors  # if not validate else None
     # For other types, log warning
-    error_msg = f"Row {row_idx}: Unsupported patch type: {type(patch).__name__}"
-    logger.warning(error_msg)
-    validation_errors.append(error_msg)
+    error_msg = f"Unsupported patch type: {type(patch).__name__}"
+    logger.warning(f"Row {row_idx}: {error_msg}")
+    validation_errors.append(InvalidPatchDocumentFormat(row_idx, error_msg))
     return {}, validation_errors  # if not validate else None
@@ -1199,6 +1256,16 @@ def _format_note_for_storage(
     note_text: str,
     current_time_ms: int,
 ) -> list[str] | None:
+    """Format a note text into a JSON-serialized list for storage.
+    Args:
+        note_text: The note text content to format.
+        current_time_ms: The current timestamp in milliseconds.
+    Returns:
+        list[str] | None: A list containing a single JSON string with note metadata
+            (text, updated_by, updated_at), or None if note_text is NaN/missing.
+    """
     if pd.isna(note_text):
         return None
     note_obj = {
@@ -1215,6 +1282,19 @@ def _log_flight_update_summary(
     request_type: FlightRequestType,
     response: FlightPostArrowFileResponse,
 ) -> None:
+    """Log a structured summary of Flight update results including metrics and errors.
+    Args:
+        project_name: Name of the project being updated.
+        total_spans: Total number of spans in the update request.
+        request_type: The type of Flight request being performed.
+        response: The Flight response object containing update results and errors.
+    Notes:
+        Logs one summary line with aggregated metrics, plus individual error lines
+        for any failed span updates. Metrics include success rate, spans processed,
+        and failure counts.
+    """
     spans_updated = getattr(response, "spans_updated", None)
     if spans_updated is None:
         # Fallback for older response types
@@ -1278,6 +1358,18 @@ def _message_to_dict(
     preserve_names: bool = True,
     use_int_enums: bool = False,
 ) -> dict[str, object]:
+    """Convert a protobuf Message to a dictionary representation.
+    Args:
+        msg: The protobuf Message to convert.
+        preserve_names: If True, preserve original proto field names. If False, use
+            lowerCamelCase names. Defaults to True.
+        use_int_enums: If True, represent enum values as integers. If False, use
+            enum string names. Defaults to False.
+    Returns:
+        dict[str, object]: Dictionary representation of the protobuf message.
+    """
     return json_format.MessageToDict(
         msg,
         preserving_proto_field_name=preserve_names,

arize/spans/conversion.py CHANGED Viewed

@@ -120,17 +120,19 @@ def is_missing_value(value: object) -> bool:
         np.inf,
         -np.inf,
     )
-    return value in assumed_missing_values or pd.isna(value)
+    return value in assumed_missing_values or pd.isna(value)  # type: ignore[call-overload]
 def _jsonify_list_of_dicts(
     list_of_dicts: Iterable[dict[str, object]] | None,
 ) -> list[str]:
-    if not isinstance(list_of_dicts, Iterable) and is_missing_value(
-        list_of_dicts
-    ):
+    if list_of_dicts is None or is_missing_value(list_of_dicts):
         return []
-    return [_jsonify_dict(d) for d in list_of_dicts]
+    return [
+        result
+        for d in list_of_dicts
+        if (result := _jsonify_dict(d)) is not None
+    ]
 def _jsonify_dict(d: dict[str, object] | None) -> str | None:

arize/spans/validation/annotations/dataframe_form_validation.py CHANGED Viewed

@@ -65,7 +65,7 @@ def check_invalid_annotation_column_names(
     df: pd.DataFrame,
 ) -> list[ValidationError]:
     """Checks for columns that start with 'annotation.' but don't match the expected pattern."""
-    errors = []
+    errors: list[ValidationError] = []
     invalid_annotation_columns = [
         col

arize/spans/validation/annotations/value_validation.py CHANGED Viewed

@@ -78,7 +78,7 @@ def check_annotation_updated_at_timestamp(
     df: pd.DataFrame,
     col_name: str,
     is_required: bool,
-) -> list[InvalidMissingValueInColumn | InvalidAnnotationTimestamp]:
+) -> list[ValidationError]:
     """Validates annotation timestamp values for validity and acceptable ranges.
     Checks that timestamp values are positive, not in the future, and satisfy
@@ -96,7 +96,7 @@ def check_annotation_updated_at_timestamp(
     if col_name not in df.columns:
         return []
-    errors = []
+    errors: list[ValidationError] = []
     if is_required and df[col_name].isnull().any():
         errors.append(
             InvalidMissingValueInColumn(
@@ -131,7 +131,7 @@ def check_annotation_cols(
     dataframe: pd.DataFrame,
 ) -> list[ValidationError]:
     """Checks value length and validity for columns matching annotation patterns."""
-    checks = []
+    checks: list[list[ValidationError]] = []
     for col in dataframe.columns:
         if col.endswith(ANNOTATION_LABEL_SUFFIX):
             checks.append(
@@ -140,7 +140,8 @@ def check_annotation_cols(
                     col_name=col,
                     min_len=ANNOTATION_LABEL_MIN_STR_LENGTH,
                     max_len=ANNOTATION_LABEL_MAX_STR_LENGTH,
-                    is_required=False,  # Individual columns are not required, null check handles completeness
+                    # Individual columns are not required
+                    is_required=False,
                 )
             )
         elif col.endswith(ANNOTATION_SCORE_SUFFIX):
@@ -231,15 +232,11 @@ def check_annotation_notes_column(
     col_name = ANNOTATION_NOTES_COLUMN_NAME
     if col_name in dataframe.columns:
         # Validate the length of the raw string
-        return list(
-            chain(
-                *common_value_validation.check_string_column_value_length(
-                    df=dataframe,
-                    col_name=col_name,
-                    min_len=0,  # Allow empty notes
-                    max_len=ANNOTATION_NOTES_MAX_STR_LENGTH,
-                    is_required=False,
-                )
-            )
+        return common_value_validation.check_string_column_value_length(
+            df=dataframe,
+            col_name=col_name,
+            min_len=0,  # Allow empty notes
+            max_len=ANNOTATION_NOTES_MAX_STR_LENGTH,
+            is_required=False,
         )
     return []

arize/spans/validation/common/dataframe_form_validation.py CHANGED Viewed

@@ -67,5 +67,5 @@ def check_dataframe_for_duplicate_columns(
     # Get the duplicated column names from the dataframe
     duplicate_columns = df.columns[df.columns.duplicated()]
     if not duplicate_columns.empty:
-        return [InvalidDataFrameDuplicateColumns(duplicate_columns)]
+        return [InvalidDataFrameDuplicateColumns(duplicate_columns.tolist())]
     return []

arize/spans/validation/common/value_validation.py CHANGED Viewed

@@ -11,6 +11,7 @@ from arize.constants.ml import (
     MAX_FUTURE_YEARS_FROM_CURRENT_TIME,
     MAX_PAST_YEARS_FROM_CURRENT_TIME,
 )
+from arize.exceptions.base import ValidationError
 from arize.exceptions.parameters import InvalidModelVersion, InvalidProjectName
 from arize.spans.columns import (
     SPAN_END_TIME_COL,
@@ -73,7 +74,7 @@ def check_string_column_value_length(
     max_len: int,
     is_required: bool,
     must_be_json: bool = False,
-) -> list[InvalidMissingValueInColumn | InvalidStringLengthInColumn]:
+) -> list[ValidationError]:
     """Validate string column values are within length bounds and optionally valid JSON.
     Args:
@@ -90,7 +91,7 @@ def check_string_column_value_length(
     if col_name not in df.columns:
         return []
-    errors = []
+    errors: list[ValidationError] = []
     if is_required and df[col_name].isnull().any():
         errors.append(
             InvalidMissingValueInColumn(
@@ -129,7 +130,7 @@ def check_string_column_allowed_values(
     col_name: str,
     allowed_values: list[str],
     is_required: bool,
-) -> list[InvalidMissingValueInColumn | InvalidStringValueNotAllowedInColumn]:
+) -> list[ValidationError]:
     """Validate that string column values are within allowed values.
     Args:
@@ -144,7 +145,7 @@ def check_string_column_allowed_values(
     if col_name not in df.columns:
         return []
-    errors = []
+    errors: list[ValidationError] = []
     if is_required and df[col_name].isnull().any():
         errors.append(
             InvalidMissingValueInColumn(
@@ -177,7 +178,7 @@ def check_string_column_allowed_values(
 def check_float_column_valid_numbers(
     df: pd.DataFrame,
     col_name: str,
-) -> list[InvalidFloatValueInColumn]:
+) -> list[ValidationError]:
     """Check that float column contains only finite numbers, no infinity values.
     Args:
@@ -201,11 +202,7 @@ def check_float_column_valid_numbers(
 def check_value_columns_start_end_time(
     df: pd.DataFrame,
-) -> list[
-    InvalidMissingValueInColumn
-    | InvalidTimestampValueInColumn
-    | InvalidStartAndEndTimeValuesInColumn
-]:
+) -> list[ValidationError]:
     """Validate start and end time columns for timestamps and logical ordering.
     Args:
@@ -214,7 +211,7 @@ def check_value_columns_start_end_time(
     Returns:
         List of validation errors for missing values, invalid timestamps, or start > end.
     """
-    errors = []
+    errors: list[ValidationError] = []
     errors += check_value_timestamp(
         df=df,
         col_name=SPAN_START_TIME_COL.name,
@@ -243,7 +240,7 @@ def check_value_timestamp(
     df: pd.DataFrame,
     col_name: str,
     is_required: bool,
-) -> list[InvalidMissingValueInColumn | InvalidTimestampValueInColumn]:
+) -> list[ValidationError]:
     """Validate timestamp column values are within reasonable bounds.
     Args:
@@ -258,7 +255,7 @@ def check_value_timestamp(
     if col_name not in df.columns:
         return []
-    errors = []
+    errors: list[ValidationError] = []
     if is_required and df[col_name].isnull().any():
         errors.append(
             InvalidMissingValueInColumn(

arize/spans/validation/evals/value_validation.py CHANGED Viewed

@@ -40,7 +40,7 @@ def check_eval_cols(
     Returns:
         List of validation errors found in evaluation columns.
     """
-    checks = []
+    checks: list[list[ValidationError]] = []
     for col in dataframe.columns:
         if col.endswith(EVAL_LABEL_SUFFIX):
             checks.append(

arize/spans/validation/metadata/argument_validation.py CHANGED Viewed

@@ -39,7 +39,7 @@ def validate_argument_types(
     Returns:
         A list of validation errors, empty if none found
     """
-    errors = []
+    errors: list[ValidationError] = []
     # Check metadata_dataframe type
     if not isinstance(metadata_dataframe, pd.DataFrame):

arize/spans/validation/metadata/dataframe_form_validation.py CHANGED Viewed

@@ -41,7 +41,7 @@ def validate_dataframe_form(
     Returns:
         A list of validation errors, empty if none found
     """
-    errors = []
+    errors: list[ValidationError] = []
     # Check for empty dataframe
     if metadata_dataframe.empty:

arize/spans/validation/metadata/value_validation.py CHANGED Viewed

@@ -34,6 +34,28 @@ class MetadataValueError(ValidationError):
         return f"{self.message} {self.resolution}"
+class InvalidPatchDocumentFormat(ValidationError):
+    """Raised when patch document format is invalid or cannot be parsed."""
+    def __init__(self, row_idx: int, message: str) -> None:
+        """Initialize the exception with patch document format error context.
+        Args:
+            row_idx: The row index where the invalid patch was found.
+            message: Detailed error message describing the format issue.
+        """
+        self.row_idx = row_idx
+        self.message = message
+    def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
+        return "Invalid_Patch_Document_Format"
+    def error_message(self) -> str:
+        """Return the error message for this exception."""
+        return f"Row {self.row_idx}: {self.message}"
 def calculate_json_depth(obj: object, current_depth: int = 1) -> int:
     """Calculate the maximum nesting depth of a JSON object.
@@ -67,7 +89,7 @@ def validate_values(
     Returns:
         A list of validation errors, empty if none found
     """
-    errors = []
+    errors: list[ValidationError] = []
     # Skip validation if span_id column is not present
     if SPAN_SPAN_ID_COL.name not in metadata_dataframe.columns:

arize/utils/arrow.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# type: ignore[pb2]
 """Apache Arrow utilities for data serialization and file operations."""
 from __future__ import annotations
@@ -124,6 +123,18 @@ def post_arrow_table(
 def _append_to_pyarrow_metadata(
     pa_schema: pa.Schema, new_metadata: dict[str, Any]
 ) -> object:
+    """Append metadata to a PyArrow schema without overwriting existing keys.
+    Args:
+        pa_schema: The PyArrow schema to add metadata to.
+        new_metadata: Dictionary of metadata key-value pairs to append.
+    Returns:
+        pa.Schema: A new PyArrow schema with the merged metadata.
+    Raises:
+        KeyError: If any keys in new_metadata conflict with existing schema metadata.
+    """
     # Ensure metadata is handled correctly, even if initially None.
     metadata = pa_schema.metadata
     if metadata is None:
@@ -145,6 +156,14 @@ def _append_to_pyarrow_metadata(
 def _write_arrow_file(
     path: str, pa_table: pa.Table, pa_schema: pa.Schema, max_chunksize: int
 ) -> None:
+    """Write a PyArrow table to an Arrow IPC file with specified schema and chunk size.
+    Args:
+        path: The file path where the Arrow file will be written.
+        pa_table: The PyArrow table containing the data to write.
+        pa_schema: The PyArrow schema to use for the file.
+        max_chunksize: Maximum number of rows per record batch chunk.
+    """
     with (
         pa.OSFile(path, mode="wb") as sink,
         pa.ipc.RecordBatchStreamWriter(sink, pa_schema) as writer,
@@ -153,6 +172,15 @@ def _write_arrow_file(
 def _maybe_log_project_url(response: requests.Response) -> None:
+    """Attempt to extract and log the Arize project URL from an HTTP response.
+    Args:
+        response: The HTTP response object from an Arize API request.
+    Notes:
+        Logs success message with URL if extraction succeeds, or warning if it fails.
+        This function never raises exceptions.
+    """
     try:
         url = get_arize_project_url(response)
         if url:
@@ -176,6 +204,14 @@ def _mktemp_in(directory: str) -> str:
 def _filesize(path: str) -> int:
+    """Get the size of a file in bytes.
+    Args:
+        path: The file path to check.
+    Returns:
+        int: The file size in bytes, or -1 if the file cannot be accessed.
+    """
     try:
         return os.path.getsize(path)
     except Exception:

arize/utils/online_tasks/dataframe_preprocessor.py CHANGED Viewed

@@ -81,9 +81,12 @@ def extract_nested_data_to_column(
         remainder = ".".join(parts[prefix_len:])
         # 3) Apply introspect row-by-row
+        # Type narrowing: prefix_col is guaranteed to be str after the None check above
+        prefix_col_str: str = prefix_col
         def apply_introspect_arize_attribute(
-            row: pd.Series,
-            prefix_col: str = prefix_col,
+            row: pd.Series,  # type: ignore[type-arg]
+            prefix_col: str = prefix_col_str,
             remainder: str = remainder,
         ) -> object:
             val = row[prefix_col]
@@ -94,8 +97,9 @@ def extract_nested_data_to_column(
             else:
                 return result if result is not None else np.nan
-        result_df[attribute] = result_df.apply(
-            apply_introspect_arize_attribute, axis=1
+        result_df[attribute] = result_df.apply(  # type: ignore[call-overload]
+            apply_introspect_arize_attribute,
+            axis=1,
         )
         new_cols.append(attribute)

arize/utils/proto.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# type: ignore[pb2]
 """Protocol buffer schema utilities for tracing data."""
 from arize._generated.protocol.rec import public_pb2 as pb2

arize/utils/types.py CHANGED Viewed

@@ -43,7 +43,7 @@ def is_array_of(arr: Sequence[object], tp: T) -> bool:
     return isinstance(arr, np.ndarray) and all(isinstance(x, tp) for x in arr)
-def is_list_of(lst: Sequence[object], tp: T) -> bool:
+def is_list_of(lst: object, tp: T) -> bool:
     """Check if a value is a list with all elements of a specific type.
     Args:
@@ -70,10 +70,10 @@ def is_iterable_of(lst: Sequence[object], tp: T) -> bool:
 def is_dict_of(
-    d: dict[object, object],
-    key_allowed_types: T,
-    value_allowed_types: T = (),
-    value_list_allowed_types: T = (),
+    d: object,
+    key_allowed_types: type | tuple[type, ...],
+    value_allowed_types: type | tuple[type, ...] = (),
+    value_list_allowed_types: type | tuple[type, ...] = (),
 ) -> bool:
     """Method to check types are valid for dictionary.
@@ -98,7 +98,7 @@ def is_dict_of(
         and all(isinstance(k, key_allowed_types) for k in d)
         and all(
             isinstance(v, value_allowed_types)
-            or any(is_list_of(v, t) for t in value_list_allowed_types)
+            or any(is_list_of(v, t) for t in value_list_allowed_types)  # type: ignore[union-attr]
             for v in d.values()
             if value_allowed_types or value_list_allowed_types
         )

arize 8.0.0b2__py3-none-any.whl → 8.0.1__py3-none-any.whl

arize 8.0.0b2py3-none-any.whl → 8.0.1py3-none-any.whl