PyPI - arize - Versions diffs - 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl - Mend

arize 8.0.0a22py3-none-any.whl → 8.0.0b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

arize/__init__.py +28 -19
arize/_exporter/client.py +56 -37
arize/_exporter/parsers/tracing_data_parser.py +41 -30
arize/_exporter/validation.py +3 -3
arize/_flight/client.py +207 -76
arize/_generated/api_client/__init__.py +30 -6
arize/_generated/api_client/api/__init__.py +1 -0
arize/_generated/api_client/api/datasets_api.py +864 -190
arize/_generated/api_client/api/experiments_api.py +167 -131
arize/_generated/api_client/api/projects_api.py +1197 -0
arize/_generated/api_client/api_client.py +2 -2
arize/_generated/api_client/configuration.py +42 -34
arize/_generated/api_client/exceptions.py +2 -2
arize/_generated/api_client/models/__init__.py +15 -4
arize/_generated/api_client/models/dataset.py +10 -10
arize/_generated/api_client/models/dataset_example.py +111 -0
arize/_generated/api_client/models/dataset_example_update.py +100 -0
arize/_generated/api_client/models/dataset_version.py +13 -13
arize/_generated/api_client/models/datasets_create_request.py +16 -8
arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
arize/_generated/api_client/models/datasets_list200_response.py +10 -4
arize/_generated/api_client/models/experiment.py +14 -16
arize/_generated/api_client/models/experiment_run.py +108 -0
arize/_generated/api_client/models/experiment_run_create.py +102 -0
arize/_generated/api_client/models/experiments_create_request.py +16 -10
arize/_generated/api_client/models/experiments_list200_response.py +10 -4
arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
arize/_generated/api_client/models/primitive_value.py +172 -0
arize/_generated/api_client/models/problem.py +100 -0
arize/_generated/api_client/models/project.py +99 -0
arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
arize/_generated/api_client/models/projects_list200_response.py +106 -0
arize/_generated/api_client/rest.py +2 -2
arize/_generated/api_client/test/test_dataset.py +4 -2
arize/_generated/api_client/test/test_dataset_example.py +56 -0
arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
arize/_generated/api_client/test/test_dataset_version.py +7 -2
arize/_generated/api_client/test/test_datasets_api.py +27 -13
arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
arize/_generated/api_client/test/test_experiment.py +2 -4
arize/_generated/api_client/test/test_experiment_run.py +56 -0
arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
arize/_generated/api_client/test/test_experiments_api.py +6 -6
arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
arize/_generated/api_client/test/test_problem.py +57 -0
arize/_generated/api_client/test/test_project.py +58 -0
arize/_generated/api_client/test/test_projects_api.py +59 -0
arize/_generated/api_client/test/test_projects_create_request.py +54 -0
arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
arize/_generated/api_client_README.md +43 -29
arize/_generated/protocol/flight/flight_pb2.py +400 -0
arize/_lazy.py +27 -19
arize/client.py +181 -58
arize/config.py +324 -116
arize/constants/__init__.py +1 -0
arize/constants/config.py +11 -4
arize/constants/ml.py +6 -4
arize/constants/openinference.py +2 -0
arize/constants/pyarrow.py +2 -0
arize/constants/spans.py +3 -1
arize/datasets/__init__.py +1 -0
arize/datasets/client.py +304 -84
arize/datasets/errors.py +32 -2
arize/datasets/validation.py +18 -8
arize/embeddings/__init__.py +2 -0
arize/embeddings/auto_generator.py +23 -19
arize/embeddings/base_generators.py +89 -36
arize/embeddings/constants.py +2 -0
arize/embeddings/cv_generators.py +26 -4
arize/embeddings/errors.py +27 -5
arize/embeddings/nlp_generators.py +43 -18
arize/embeddings/tabular_generators.py +46 -31
arize/embeddings/usecases.py +12 -2
arize/exceptions/__init__.py +1 -0
arize/exceptions/auth.py +11 -1
arize/exceptions/base.py +29 -4
arize/exceptions/models.py +21 -2
arize/exceptions/parameters.py +31 -0
arize/exceptions/spaces.py +12 -1
arize/exceptions/types.py +86 -7
arize/exceptions/values.py +220 -20
arize/experiments/__init__.py +13 -0
arize/experiments/client.py +394 -285
arize/experiments/evaluators/__init__.py +1 -0
arize/experiments/evaluators/base.py +74 -41
arize/experiments/evaluators/exceptions.py +6 -3
arize/experiments/evaluators/executors.py +121 -73
arize/experiments/evaluators/rate_limiters.py +106 -57
arize/experiments/evaluators/types.py +34 -7
arize/experiments/evaluators/utils.py +65 -27
arize/experiments/functions.py +103 -101
arize/experiments/tracing.py +52 -44
arize/experiments/types.py +56 -31
arize/logging.py +54 -22
arize/ml/__init__.py +1 -0
arize/ml/batch_validation/__init__.py +1 -0
arize/{models → ml}/batch_validation/errors.py +545 -67
arize/{models → ml}/batch_validation/validator.py +344 -303
arize/ml/bounded_executor.py +47 -0
arize/{models → ml}/casting.py +118 -108
arize/{models → ml}/client.py +339 -118
arize/{models → ml}/proto.py +97 -42
arize/{models → ml}/stream_validation.py +43 -15
arize/ml/surrogate_explainer/__init__.py +1 -0
arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
arize/{types.py → ml/types.py} +355 -354
arize/pre_releases.py +44 -0
arize/projects/__init__.py +1 -0
arize/projects/client.py +134 -0
arize/regions.py +40 -0
arize/spans/__init__.py +1 -0
arize/spans/client.py +204 -175
arize/spans/columns.py +13 -0
arize/spans/conversion.py +60 -37
arize/spans/validation/__init__.py +1 -0
arize/spans/validation/annotations/__init__.py +1 -0
arize/spans/validation/annotations/annotations_validation.py +6 -4
arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
arize/spans/validation/annotations/value_validation.py +35 -11
arize/spans/validation/common/__init__.py +1 -0
arize/spans/validation/common/argument_validation.py +33 -8
arize/spans/validation/common/dataframe_form_validation.py +35 -9
arize/spans/validation/common/errors.py +211 -11
arize/spans/validation/common/value_validation.py +81 -14
arize/spans/validation/evals/__init__.py +1 -0
arize/spans/validation/evals/dataframe_form_validation.py +28 -8
arize/spans/validation/evals/evals_validation.py +34 -4
arize/spans/validation/evals/value_validation.py +26 -3
arize/spans/validation/metadata/__init__.py +1 -1
arize/spans/validation/metadata/argument_validation.py +14 -5
arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
arize/spans/validation/metadata/value_validation.py +24 -10
arize/spans/validation/spans/__init__.py +1 -0
arize/spans/validation/spans/dataframe_form_validation.py +35 -14
arize/spans/validation/spans/spans_validation.py +35 -4
arize/spans/validation/spans/value_validation.py +78 -8
arize/utils/__init__.py +1 -0
arize/utils/arrow.py +31 -15
arize/utils/cache.py +34 -6
arize/utils/dataframe.py +20 -3
arize/utils/online_tasks/__init__.py +2 -0
arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
arize/utils/openinference_conversion.py +44 -5
arize/utils/proto.py +10 -0
arize/utils/size.py +5 -3
arize/utils/types.py +105 -0
arize/version.py +3 -1
{arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
arize-8.0.0b0.dist-info/RECORD +175 -0
{arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
arize/_generated/protocol/flight/export_pb2.py +0 -61
arize/_generated/protocol/flight/ingest_pb2.py +0 -365
arize/models/__init__.py +0 -0
arize/models/batch_validation/__init__.py +0 -0
arize/models/bounded_executor.py +0 -34
arize/models/surrogate_explainer/__init__.py +0 -0
arize-8.0.0a22.dist-info/RECORD +0 -146
arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12

arize/{models → ml}/proto.py RENAMED Viewed

@@ -1,14 +1,14 @@
+"""Protocol buffer utilities for ML model data serialization."""
 # type: ignore[pb2]
 from __future__ import annotations
-from typing import Tuple
 from google.protobuf.timestamp_pb2 import Timestamp
 from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
 from arize._generated.protocol.rec import public_pb2 as pb2
 from arize.exceptions.parameters import InvalidValueType
-from arize.types import (
+from arize.ml.types import (
     CATEGORICAL_MODEL_TYPES,
     NUMERIC_MODEL_TYPES,
     Embedding,
@@ -22,11 +22,19 @@ from arize.types import (
     RankingPredictionLabel,
     SemanticSegmentationLabel,
     convert_element,
-    is_list_of,
 )
+from arize.utils.types import is_list_of
+def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
+    """Convert a dictionary to protobuf format with string keys and pb2.Value values.
+    Args:
+        d: Dictionary to convert, or None.
-def get_pb_dictionary(d):
+    Returns:
+        Dictionary with string keys and protobuf Value objects, or empty dict if input is None.
+    """
     if d is None:
         return {}
     # Takes a dictionary and
@@ -41,6 +49,18 @@ def get_pb_dictionary(d):
 def get_pb_value(name: str | int | float, value: pb2.Value) -> pb2.Value:
+    """Convert a Python value to a protobuf Value object.
+    Args:
+        name: The name/key associated with this value.
+        value: The value to convert to protobuf format.
+    Returns:
+        A pb2.Value protobuf object, or None if value cannot be converted.
+    Raises:
+        TypeError: If value type is not supported.
+    """
     if isinstance(value, pb2.Value):
         return value
     if value is not None and is_list_of(value, str):
@@ -50,19 +70,18 @@ def get_pb_value(name: str | int | float, value: pb2.Value) -> pb2.Value:
     val = convert_element(value)
     if val is None:
         return None
-    elif isinstance(val, (str, bool)):
+    if isinstance(val, (str, bool)):
         return pb2.Value(string=str(val))
-    elif isinstance(val, int):
+    if isinstance(val, int):
         return pb2.Value(int=val)
-    elif isinstance(val, float):
+    if isinstance(val, float):
         return pb2.Value(double=val)
-    elif isinstance(val, Embedding):
+    if isinstance(val, Embedding):
         return pb2.Value(embedding=get_pb_embedding(val))
-    else:
-        raise TypeError(
-            f"dimension '{name}' = {value} is type {type(value)}, but must be "
-            "one of: bool, str, float, int, embedding"
-        )
+    raise TypeError(
+        f"dimension '{name}' = {value} is type {type(value)}, but must be "
+        "one of: bool, str, float, int, embedding"
+    )
 def get_pb_label(
@@ -71,7 +90,7 @@ def get_pb_label(
     | bool
     | int
     | float
-    | Tuple[str, float]
+    | tuple[str, float]
     | ObjectDetectionLabel
     | SemanticSegmentationLabel
     | InstanceSegmentationPredictionLabel
@@ -82,19 +101,32 @@ def get_pb_label(
     | MultiClassActualLabel,
     model_type: ModelTypes,
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
+    """Convert a label value to the appropriate protobuf label type.
+    Args:
+        prediction_or_actual: Whether this is a "prediction" or "actual" label.
+        value: The label value to convert.
+        model_type: The type of model (numeric, categorical, etc.).
+    Returns:
+        A protobuf PredictionLabel or ActualLabel object.
+    Raises:
+        ValueError: If model_type is not supported.
+    """
     value = convert_element(value)
     if model_type in NUMERIC_MODEL_TYPES:
         return _get_numeric_pb_label(prediction_or_actual, value)
-    elif (
+    if (
         model_type in CATEGORICAL_MODEL_TYPES
         or model_type == ModelTypes.GENERATIVE_LLM
     ):
         return _get_score_categorical_pb_label(prediction_or_actual, value)
-    elif model_type == ModelTypes.OBJECT_DETECTION:
+    if model_type == ModelTypes.OBJECT_DETECTION:
         return _get_cv_pb_label(prediction_or_actual, value)
-    elif model_type == ModelTypes.RANKING:
+    if model_type == ModelTypes.RANKING:
         return _get_ranking_pb_label(value)
-    elif model_type == ModelTypes.MULTI_CLASS:
+    if model_type == ModelTypes.MULTI_CLASS:
         return _get_multi_class_pb_label(value)
     raise ValueError(
         f"model_type must be one of: {[mt.prediction_or_actual for mt in ModelTypes]} "
@@ -103,7 +135,18 @@ def get_pb_label(
     )
-def get_pb_timestamp(time_overwrite):
+def get_pb_timestamp(time_overwrite: int | None) -> object | None:
+    """Convert a Unix timestamp to a protobuf Timestamp object.
+    Args:
+        time_overwrite: Unix epoch time in seconds, or None.
+    Returns:
+        A protobuf Timestamp object, or None if input is None.
+    Raises:
+        TypeError: If time_overwrite is not an integer.
+    """
     if time_overwrite is None:
         return None
     time = convert_element(time_overwrite)
@@ -118,6 +161,14 @@ def get_pb_timestamp(time_overwrite):
 def get_pb_embedding(val: Embedding) -> pb2.Embedding:
+    """Convert an Embedding object to a protobuf Embedding.
+    Args:
+        val: The Embedding object containing vector, data, and link_to_data.
+    Returns:
+        A protobuf Embedding object with the vector and optional raw data.
+    """
     if Embedding._is_valid_iterable(val.data):
         return pb2.Embedding(
             vector=val.vector,
@@ -126,7 +177,7 @@ def get_pb_embedding(val: Embedding) -> pb2.Embedding:
             ),
             link_to_data=StringValue(value=val.link_to_data),
         )
-    elif isinstance(val.data, str):
+    if isinstance(val.data, str):
         return pb2.Embedding(
             vector=val.vector,
             raw_data=pb2.Embedding.RawData(
@@ -135,7 +186,7 @@ def get_pb_embedding(val: Embedding) -> pb2.Embedding:
             ),
             link_to_data=StringValue(value=val.link_to_data),
         )
-    elif val.data is None:
+    if val.data is None:
         return pb2.Embedding(
             vector=val.vector,
             link_to_data=StringValue(value=val.link_to_data),
@@ -156,13 +207,14 @@ def _get_numeric_pb_label(
         )
     if prediction_or_actual == "prediction":
         return pb2.PredictionLabel(numeric=value)
-    elif prediction_or_actual == "actual":
+    if prediction_or_actual == "actual":
         return pb2.ActualLabel(numeric=value)
+    return None
 def _get_score_categorical_pb_label(
     prediction_or_actual: str,
-    value: bool | str | Tuple[str, float],
+    value: bool | str | tuple[str, float],
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
     sc = pb2.ScoreCategorical()
     if isinstance(value, bool):
@@ -202,8 +254,9 @@ def _get_score_categorical_pb_label(
         )
     if prediction_or_actual == "prediction":
         return pb2.PredictionLabel(score_categorical=sc)
-    elif prediction_or_actual == "actual":
+    if prediction_or_actual == "actual":
         return pb2.ActualLabel(score_categorical=sc)
+    return None
 def _get_cv_pb_label(
@@ -215,20 +268,19 @@ def _get_cv_pb_label(
 ) -> pb2.PredictionLabel | pb2.ActualLabel:
     if isinstance(value, ObjectDetectionLabel):
         return _get_object_detection_pb_label(prediction_or_actual, value)
-    elif isinstance(value, SemanticSegmentationLabel):
+    if isinstance(value, SemanticSegmentationLabel):
         return _get_semantic_segmentation_pb_label(prediction_or_actual, value)
-    elif isinstance(value, InstanceSegmentationPredictionLabel):
+    if isinstance(value, InstanceSegmentationPredictionLabel):
         return _get_instance_segmentation_prediction_pb_label(value)
-    elif isinstance(value, InstanceSegmentationActualLabel):
+    if isinstance(value, InstanceSegmentationActualLabel):
         return _get_instance_segmentation_actual_pb_label(value)
-    else:
-        raise InvalidValueType(
-            "cv label",
-            value,
-            "ObjectDetectionLabel, SemanticSegmentationLabel, or "
-            "InstanceSegmentationPredictionLabel for model type "
-            f"{ModelTypes.OBJECT_DETECTION}",
-        )
+    raise InvalidValueType(
+        "cv label",
+        value,
+        "ObjectDetectionLabel, SemanticSegmentationLabel, or "
+        "InstanceSegmentationPredictionLabel for model type "
+        f"{ModelTypes.OBJECT_DETECTION}",
+    )
 def _get_object_detection_pb_label(
@@ -265,8 +317,9 @@ def _get_object_detection_pb_label(
     od.bounding_boxes.extend(bounding_boxes)
     if prediction_or_actual == "prediction":
         return pb2.PredictionLabel(object_detection=od)
-    elif prediction_or_actual == "actual":
+    if prediction_or_actual == "actual":
         return pb2.ActualLabel(object_detection=od)
+    return None
 def _get_semantic_segmentation_pb_label(
@@ -292,10 +345,11 @@ def _get_semantic_segmentation_pb_label(
         cv_label = pb2.CVPredictionLabel()
         cv_label.semantic_segmentation_label.polygons.extend(polygons)
         return pb2.PredictionLabel(cv_label=cv_label)
-    elif prediction_or_actual == "actual":
+    if prediction_or_actual == "actual":
         cv_label = pb2.CVActualLabel()
         cv_label.semantic_segmentation_label.polygons.extend(polygons)
         return pb2.ActualLabel(cv_label=cv_label)
+    return None
 def _get_instance_segmentation_prediction_pb_label(
@@ -394,7 +448,7 @@ def _get_ranking_pb_label(
         if value.label is not None:
             rp.label = value.label
         return pb2.PredictionLabel(ranking=rp)
-    elif isinstance(value, RankingActualLabel):
+    if isinstance(value, RankingActualLabel):
         ra = pb2.RankingActual()
         # relevance_labels and relevance_score are optional
         if value.relevance_labels is not None:
@@ -402,6 +456,7 @@ def _get_ranking_pb_label(
         if value.relevance_score is not None:
             ra.relevance_score.value = value.relevance_score
         return pb2.ActualLabel(ranking=ra)
+    return None
 def _get_multi_class_pb_label(
@@ -447,9 +502,8 @@ def _get_multi_class_pb_label(
                 prediction_scores=prediction_scores_double_values,
             )
             mc_pred = pb2.MultiClassPrediction(single_label=single_label)
-        p_label = pb2.PredictionLabel(multi_class=mc_pred)
-        return p_label
-    elif isinstance(value, MultiClassActualLabel):
+        return pb2.PredictionLabel(multi_class=mc_pred)
+    if isinstance(value, MultiClassActualLabel):
         # Validations checked actual score map is not None
         actual_labels = []  # list of class names with actual score of 1
         for class_name, score in value.actual_scores.items():
@@ -459,3 +513,4 @@ def _get_multi_class_pb_label(
             actual_labels=actual_labels,
         )
         return pb2.ActualLabel(multi_class=mc_act)
+    return None

arize/{models → ml}/stream_validation.py RENAMED Viewed

@@ -1,11 +1,12 @@
+"""Stream validation logic for ML model predictions."""
 # type: ignore[pb2]
-from typing import Dict, Tuple
 from arize.constants.ml import MAX_PREDICTION_ID_LEN, MIN_PREDICTION_ID_LEN
 from arize.exceptions.parameters import (
     InvalidValueType,
 )
-from arize.types import (
+from arize.ml.types import (
     CATEGORICAL_MODEL_TYPES,
     NUMERIC_MODEL_TYPES,
     ActualLabelTypes,
@@ -32,7 +33,7 @@ def validate_label(
     | bool
     | int
     | float
-    | Tuple[str | bool, float]
+    | tuple[str | bool, float]
     | ObjectDetectionLabel
     | RankingPredictionLabel
     | RankingActualLabel
@@ -41,8 +42,20 @@ def validate_label(
     | InstanceSegmentationActualLabel
     | MultiClassPredictionLabel
     | MultiClassActualLabel,
-    embedding_features: Dict[str, Embedding],
-):
+    embedding_features: dict[str, Embedding],
+) -> None:
+    """Validate a label value against the specified model type.
+    Args:
+        prediction_or_actual: Whether this is a "prediction" or "actual" label.
+        model_type: The type of model (numeric, categorical, etc.).
+        label: The label value to validate.
+        embedding_features: Dictionary of embedding features for validation.
+    Raises:
+        ValueError: If label is invalid for the given model type.
+        TypeError: If label type is incorrect.
+    """
     if model_type in NUMERIC_MODEL_TYPES:
         _validate_numeric_label(model_type, label)
     elif model_type in CATEGORICAL_MODEL_TYPES:
@@ -63,8 +76,8 @@ def validate_label(
 def _validate_numeric_label(
     model_type: ModelTypes,
-    label: str | bool | int | float | Tuple[str | bool, float],
-):
+    label: str | bool | int | float | tuple[str | bool, float],
+) -> None:
     if not isinstance(label, (float, int)):
         raise InvalidValueType(
             f"label {label}",
@@ -75,8 +88,8 @@ def _validate_numeric_label(
 def _validate_categorical_label(
     model_type: ModelTypes,
-    label: str | bool | int | float | Tuple[str | bool, float],
-):
+    label: str | bool | int | float | tuple[str | bool, float],
+) -> None:
     is_valid = isinstance(label, (str, bool, int, float)) or (
         isinstance(label, tuple)
         and isinstance(label[0], (str, bool))
@@ -96,8 +109,8 @@ def _validate_cv_label(
     | SemanticSegmentationLabel
     | InstanceSegmentationPredictionLabel
     | InstanceSegmentationActualLabel,
-    embedding_features: Dict[str, Embedding],
-):
+    embedding_features: dict[str, Embedding],
+) -> None:
     if (
         not isinstance(label, ObjectDetectionLabel)
         and not isinstance(label, SemanticSegmentationLabel)
@@ -126,7 +139,7 @@ def _validate_cv_label(
 def _validate_ranking_label(
     label: RankingPredictionLabel | RankingActualLabel,
-):
+) -> None:
     if not isinstance(label, (RankingPredictionLabel, RankingActualLabel)):
         raise InvalidValueType(
             f"label {label}",
@@ -138,7 +151,7 @@ def _validate_ranking_label(
 def _validate_generative_llm_label(
     label: str | bool | int | float,
-):
+) -> None:
     is_valid = isinstance(label, (str, bool, int, float))
     if not is_valid:
         raise InvalidValueType(
@@ -150,7 +163,7 @@ def _validate_generative_llm_label(
 def _validate_multi_class_label(
     label: MultiClassPredictionLabel | MultiClassActualLabel,
-):
+) -> None:
     if not isinstance(
         label, (MultiClassPredictionLabel, MultiClassActualLabel)
     ):
@@ -167,8 +180,23 @@ def validate_and_convert_prediction_id(
     environment: Environments,
     prediction_label: PredictionLabelTypes | None = None,
     actual_label: ActualLabelTypes | None = None,
-    shap_values: Dict[str, float] | None = None,
+    shap_values: dict[str, float] | None = None,
 ) -> str:
+    """Validate and convert a prediction ID to string format, or generate one if absent.
+    Args:
+        prediction_id: The prediction ID to validate/convert, or None.
+        environment: The environment context (training, validation, production).
+        prediction_label: Optional prediction label for delayed record detection.
+        actual_label: Optional actual label for delayed record detection.
+        shap_values: Optional SHAP values for delayed record detection.
+    Returns:
+        A validated prediction ID string.
+    Raises:
+        ValueError: If prediction ID is invalid.
+    """
     # If the user does not provide prediction id
     if prediction_id:
         # If prediction id is given by user, convert it to string and validate length

arize/ml/surrogate_explainer/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Surrogate explainer implementations for model interpretability."""

arize/{models → ml}/surrogate_explainer/mimic.py RENAMED Viewed

@@ -1,9 +1,11 @@
+"""Mimic explainer implementation for surrogate model explanations."""
 from __future__ import annotations
 import random
 import string
 from dataclasses import replace
-from typing import TYPE_CHECKING, Callable, Tuple
+from typing import TYPE_CHECKING
 import numpy as np
 import pandas as pd
@@ -13,20 +15,30 @@ from interpret_community.mimic.mimic_explainer import (
 )
 from sklearn.preprocessing import LabelEncoder
-from arize.types import (
+from arize.ml.types import (
     CATEGORICAL_MODEL_TYPES,
     NUMERIC_MODEL_TYPES,
     ModelTypes,
 )
 if TYPE_CHECKING:
-    from arize.types import Schema
+    from collections.abc import Callable
+    from arize.ml.types import Schema
 class Mimic:
+    """Mimic explainer wrapper for generating surrogate model explanations."""
     _testing = False
-    def __init__(self, X: pd.DataFrame, model_func: Callable):
+    def __init__(self, X: pd.DataFrame, model_func: Callable) -> None:
+        """Initialize the Mimic explainer with training data and model.
+        Args:
+            X: Training data DataFrame for the surrogate model.
+            model_func: Model function to explain.
+        """
         self.explainer = MimicExplainer(
             model_func,
             X,
@@ -36,6 +48,7 @@ class Mimic:
         )
     def explain(self, X: pd.DataFrame) -> pd.DataFrame:
+        """Explain feature importance for the given input DataFrame."""
         return pd.DataFrame(
             self.explainer.explain_local(X).local_importance_values,
             columns=X.columns,
@@ -45,7 +58,8 @@ class Mimic:
     @staticmethod
     def augment(
         df: pd.DataFrame, schema: Schema, model_type: ModelTypes
-    ) -> Tuple[pd.DataFrame, Schema]:
+    ) -> tuple[pd.DataFrame, Schema]:
+        """Augment the DataFrame and schema with SHAP values for explainability."""
         features = schema.feature_column_names
         X = df[features]
@@ -71,7 +85,7 @@ class Mimic:
                 )
             # model func requires 1 positional argument
-            def model_func(_):  # type: ignore
+            def model_func(_: object) -> object:  # type: ignore
                 return np.column_stack((1 - y, y))
         elif model_type in NUMERIC_MODEL_TYPES:
@@ -89,7 +103,7 @@ class Mimic:
                 )
             # model func requires 1 positional argument
-            def model_func(_):  # type: ignore
+            def model_func(_: object) -> object:  # type: ignore
                 return y
         else:
@@ -100,8 +114,9 @@ class Mimic:
         # Column name mapping between features and feature importance values.
         # This is used to augment the schema.
+        # Generate unique column names to avoid collisions (not security-sensitive)
         col_map = {
-            ft: f"{''.join(random.choices(string.ascii_letters, k=8))}"
+            ft: f"{''.join(random.choices(string.ascii_letters, k=8))}"  # noqa: S311
             for ft in features
         }
         aug_schema = replace(schema, shap_values_column_names=col_map)
@@ -127,7 +142,7 @@ class Mimic:
                 X[col] = X[col].astype(object).where(~X[col].isna(), np.nan)
         # Apply integer encoding to non-numeric columns.
-        # Currently training and explaining detasets are the same, but
+        # Currently training and explaining datasets are the same, but
         # this can be changed in the future. The student model can be
         # fitted on a much larger dataset since it takes a lot less time.
         X = pd.concat(
@@ -156,7 +171,7 @@ class Mimic:
         # Fill null with zero so they're not counted as missing records by server
         if not Mimic._testing:
-            aug_df.fillna({c: 0 for c in col_map.values()}, inplace=True)
+            aug_df.fillna(dict.fromkeys(col_map.values(), 0), inplace=True)
         return (
             aug_df,

arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl

arize 8.0.0a22py3-none-any.whl → 8.0.0b0py3-none-any.whl