PyPI - arize - Versions diffs - 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl - Mend

arize 8.0.0a21py3-none-any.whl → 8.0.0a23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

arize/__init__.py +17 -9
arize/_exporter/client.py +55 -36
arize/_exporter/parsers/tracing_data_parser.py +41 -30
arize/_exporter/validation.py +3 -3
arize/_flight/client.py +208 -77
arize/_generated/api_client/__init__.py +30 -6
arize/_generated/api_client/api/__init__.py +1 -0
arize/_generated/api_client/api/datasets_api.py +864 -190
arize/_generated/api_client/api/experiments_api.py +167 -131
arize/_generated/api_client/api/projects_api.py +1197 -0
arize/_generated/api_client/api_client.py +2 -2
arize/_generated/api_client/configuration.py +42 -34
arize/_generated/api_client/exceptions.py +2 -2
arize/_generated/api_client/models/__init__.py +15 -4
arize/_generated/api_client/models/dataset.py +10 -10
arize/_generated/api_client/models/dataset_example.py +111 -0
arize/_generated/api_client/models/dataset_example_update.py +100 -0
arize/_generated/api_client/models/dataset_version.py +13 -13
arize/_generated/api_client/models/datasets_create_request.py +16 -8
arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
arize/_generated/api_client/models/datasets_list200_response.py +10 -4
arize/_generated/api_client/models/experiment.py +14 -16
arize/_generated/api_client/models/experiment_run.py +108 -0
arize/_generated/api_client/models/experiment_run_create.py +102 -0
arize/_generated/api_client/models/experiments_create_request.py +16 -10
arize/_generated/api_client/models/experiments_list200_response.py +10 -4
arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
arize/_generated/api_client/models/primitive_value.py +172 -0
arize/_generated/api_client/models/problem.py +100 -0
arize/_generated/api_client/models/project.py +99 -0
arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
arize/_generated/api_client/models/projects_list200_response.py +106 -0
arize/_generated/api_client/rest.py +2 -2
arize/_generated/api_client/test/test_dataset.py +4 -2
arize/_generated/api_client/test/test_dataset_example.py +56 -0
arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
arize/_generated/api_client/test/test_dataset_version.py +7 -2
arize/_generated/api_client/test/test_datasets_api.py +27 -13
arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
arize/_generated/api_client/test/test_experiment.py +2 -4
arize/_generated/api_client/test/test_experiment_run.py +56 -0
arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
arize/_generated/api_client/test/test_experiments_api.py +6 -6
arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
arize/_generated/api_client/test/test_problem.py +57 -0
arize/_generated/api_client/test/test_project.py +58 -0
arize/_generated/api_client/test/test_projects_api.py +59 -0
arize/_generated/api_client/test/test_projects_create_request.py +54 -0
arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
arize/_generated/api_client_README.md +43 -29
arize/_generated/protocol/flight/flight_pb2.py +400 -0
arize/_lazy.py +27 -19
arize/client.py +269 -55
arize/config.py +365 -116
arize/constants/__init__.py +1 -0
arize/constants/config.py +11 -4
arize/constants/ml.py +6 -4
arize/constants/openinference.py +2 -0
arize/constants/pyarrow.py +2 -0
arize/constants/spans.py +3 -1
arize/datasets/__init__.py +1 -0
arize/datasets/client.py +299 -84
arize/datasets/errors.py +32 -2
arize/datasets/validation.py +18 -8
arize/embeddings/__init__.py +2 -0
arize/embeddings/auto_generator.py +23 -19
arize/embeddings/base_generators.py +89 -36
arize/embeddings/constants.py +2 -0
arize/embeddings/cv_generators.py +26 -4
arize/embeddings/errors.py +27 -5
arize/embeddings/nlp_generators.py +31 -12
arize/embeddings/tabular_generators.py +32 -20
arize/embeddings/usecases.py +12 -2
arize/exceptions/__init__.py +1 -0
arize/exceptions/auth.py +11 -1
arize/exceptions/base.py +29 -4
arize/exceptions/models.py +21 -2
arize/exceptions/parameters.py +31 -0
arize/exceptions/spaces.py +12 -1
arize/exceptions/types.py +86 -7
arize/exceptions/values.py +220 -20
arize/experiments/__init__.py +1 -0
arize/experiments/client.py +390 -286
arize/experiments/evaluators/__init__.py +1 -0
arize/experiments/evaluators/base.py +74 -41
arize/experiments/evaluators/exceptions.py +6 -3
arize/experiments/evaluators/executors.py +121 -73
arize/experiments/evaluators/rate_limiters.py +106 -57
arize/experiments/evaluators/types.py +34 -7
arize/experiments/evaluators/utils.py +65 -27
arize/experiments/functions.py +103 -101
arize/experiments/tracing.py +52 -44
arize/experiments/types.py +56 -31
arize/logging.py +54 -22
arize/models/__init__.py +1 -0
arize/models/batch_validation/__init__.py +1 -0
arize/models/batch_validation/errors.py +543 -65
arize/models/batch_validation/validator.py +339 -300
arize/models/bounded_executor.py +20 -7
arize/models/casting.py +75 -29
arize/models/client.py +326 -107
arize/models/proto.py +95 -40
arize/models/stream_validation.py +42 -14
arize/models/surrogate_explainer/__init__.py +1 -0
arize/models/surrogate_explainer/mimic.py +24 -13
arize/pre_releases.py +43 -0
arize/projects/__init__.py +1 -0
arize/projects/client.py +129 -0
arize/regions.py +40 -0
arize/spans/__init__.py +1 -0
arize/spans/client.py +130 -106
arize/spans/columns.py +13 -0
arize/spans/conversion.py +54 -38
arize/spans/validation/__init__.py +1 -0
arize/spans/validation/annotations/__init__.py +1 -0
arize/spans/validation/annotations/annotations_validation.py +6 -4
arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
arize/spans/validation/annotations/value_validation.py +35 -11
arize/spans/validation/common/__init__.py +1 -0
arize/spans/validation/common/argument_validation.py +33 -8
arize/spans/validation/common/dataframe_form_validation.py +35 -9
arize/spans/validation/common/errors.py +211 -11
arize/spans/validation/common/value_validation.py +80 -13
arize/spans/validation/evals/__init__.py +1 -0
arize/spans/validation/evals/dataframe_form_validation.py +28 -8
arize/spans/validation/evals/evals_validation.py +34 -4
arize/spans/validation/evals/value_validation.py +26 -3
arize/spans/validation/metadata/__init__.py +1 -1
arize/spans/validation/metadata/argument_validation.py +14 -5
arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
arize/spans/validation/metadata/value_validation.py +24 -10
arize/spans/validation/spans/__init__.py +1 -0
arize/spans/validation/spans/dataframe_form_validation.py +34 -13
arize/spans/validation/spans/spans_validation.py +35 -4
arize/spans/validation/spans/value_validation.py +76 -7
arize/types.py +293 -157
arize/utils/__init__.py +1 -0
arize/utils/arrow.py +31 -15
arize/utils/cache.py +34 -6
arize/utils/dataframe.py +19 -2
arize/utils/online_tasks/__init__.py +2 -0
arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
arize/utils/openinference_conversion.py +44 -5
arize/utils/proto.py +10 -0
arize/utils/size.py +5 -3
arize/version.py +3 -1
{arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
arize-8.0.0a23.dist-info/RECORD +174 -0
{arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
arize/_generated/protocol/flight/export_pb2.py +0 -61
arize/_generated/protocol/flight/ingest_pb2.py +0 -365
arize-8.0.0a21.dist-info/RECORD +0 -146
arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12

arize/spans/validation/common/errors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, List
+"""Common validation error classes for spans."""
 from arize.constants.ml import (
     MAX_EMBEDDING_DIMENSIONALITY,
@@ -23,26 +23,46 @@ from arize.logging import log_a_list
 class InvalidTypeArgument(ValidationError):
+    """Raised when an argument has an invalid type."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Type_Argument"
-    def __init__(self, arg_name: str, arg_type: str, wrong_arg: Any) -> None:
+    def __init__(self, arg_name: str, arg_type: str, wrong_arg: object) -> None:
+        """Initialize the exception with argument type validation context.
+        Args:
+            arg_name: Name of the argument with invalid type.
+            arg_type: Expected type for the argument.
+            wrong_arg: Actual argument value that was invalid.
+        """
         self.arg_name = arg_name
         self.arg_type = arg_type
         self.wrong_arg = wrong_arg
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return f"The {self.arg_name} must be a {self.arg_type}. Found {type(self.wrong_arg)}"
 class InvalidDateTimeFormatType(ValidationError):
+    """Raised when datetime format type is invalid or not supported."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_DateTime_Format_Type"
-    def __init__(self, wrong_input: Any) -> None:
+    def __init__(self, wrong_input: object) -> None:
+        """Initialize the exception with datetime format validation context.
+        Args:
+            wrong_input: Invalid input that was provided for datetime format.
+        """
         self.wrong_input = wrong_input
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return f"The date time format must be a string. Found {type(self.wrong_input)}"
@@ -52,13 +72,22 @@ class InvalidDateTimeFormatType(ValidationError):
 class InvalidDataFrameDuplicateColumns(ValidationError):
+    """Raised when dataframe contains duplicate column names."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_DataFrame_Duplicate_Columns"
-    def __init__(self, duplicate_cols: List[str]) -> None:
+    def __init__(self, duplicate_cols: list[str]) -> None:
+        """Initialize the exception with duplicate columns context.
+        Args:
+            duplicate_cols: List of column names that have duplicates in the dataframe.
+        """
         self.duplicate_cols = duplicate_cols
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The following columns have duplicates in the dataframe: "
             f"{log_a_list(self.duplicate_cols, 'and')}"
@@ -66,13 +95,22 @@ class InvalidDataFrameDuplicateColumns(ValidationError):
 class InvalidDataFrameMissingColumns(ValidationError):
+    """Raised when required columns are missing from dataframe."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_DataFrame_Missing_Columns"
-    def __init__(self, missing_cols: List[str]) -> None:
+    def __init__(self, missing_cols: list[str]) -> None:
+        """Initialize the exception with missing columns context.
+        Args:
+            missing_cols: List of required columns that are missing from the dataframe.
+        """
         self.missing_cols = missing_cols
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The following columns are missing in the dataframe and are required: "
             f"{log_a_list(self.missing_cols, 'and')}"
@@ -80,16 +118,26 @@ class InvalidDataFrameMissingColumns(ValidationError):
 class InvalidDataFrameColumnContentTypes(ValidationError):
+    """Raised when dataframe column content types are invalid."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_DataFrame_Column_Content_Types"
     def __init__(
-        self, invalid_type_cols: List[str], expected_type: str
+        self, invalid_type_cols: list[str], expected_type: str
     ) -> None:
+        """Initialize the exception with column content type validation context.
+        Args:
+            invalid_type_cols: List of columns with incorrect content types.
+            expected_type: Expected content type for the columns.
+        """
         self.invalid_type_cols = invalid_type_cols
         self.expected_type = expected_type
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             "Found dataframe columns containing the wrong data type. "
             f"The following columns should contain {self.expected_type}: "
@@ -103,13 +151,22 @@ class InvalidDataFrameColumnContentTypes(ValidationError):
 class InvalidMissingValueInColumn(ValidationError):
+    """Raised when column contains null or missing values."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Missin_Value_In_Column"
     def __init__(self, col_name: str) -> None:
+        """Initialize the exception with missing value context.
+        Args:
+            col_name: Name of the column containing missing values.
+        """
         self.col_name = col_name
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The column '{self.col_name}' has at least one missing value. "
             "This column must not have missing values"
@@ -117,15 +174,26 @@ class InvalidMissingValueInColumn(ValidationError):
 class InvalidStringLengthInColumn(ValidationError):
+    """Raised when string values in column exceed length limits."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_String_Length_In_Column"
     def __init__(self, col_name: str, min_length: int, max_length: int) -> None:
+        """Initialize the exception with string length validation context.
+        Args:
+            col_name: Name of the column with invalid string lengths.
+            min_length: Minimum acceptable string length.
+            max_length: Maximum acceptable string length.
+        """
         self.col_name = col_name
         self.min_length = min_length
         self.max_length = max_length
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The column '{self.col_name}' contains invalid string values, "
             f"their length must be between {self.min_length} and {self.max_length}."
@@ -133,27 +201,46 @@ class InvalidStringLengthInColumn(ValidationError):
 class InvalidJsonStringInColumn(ValidationError):
+    """Raised when JSON string in column is invalid or malformed."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Json_String_In_Column"
     def __init__(self, col_name: str) -> None:
+        """Initialize the exception with JSON string validation context.
+        Args:
+            col_name: Name of the column containing invalid JSON strings.
+        """
         self.col_name = col_name
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The column '{self.col_name}' contains invalid JSON string values."
         )
 class InvalidStringValueNotAllowedInColumn(ValidationError):
+    """Raised when column contains disallowed string values."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_String_Value_Not_Allowed_In_Column"
-    def __init__(self, col_name: str, allowed_values: List[str]) -> None:
+    def __init__(self, col_name: str, allowed_values: list[str]) -> None:
+        """Initialize the exception with allowed string values validation context.
+        Args:
+            col_name: Name of the column containing disallowed values.
+            allowed_values: List of values that are allowed in the column.
+        """
         self.col_name = col_name
         self.allowed_values = allowed_values
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The column '{self.col_name}' contains invalid string values. "
             f"Allowed values are {log_a_list(self.allowed_values, 'and')}"
@@ -161,13 +248,22 @@ class InvalidStringValueNotAllowedInColumn(ValidationError):
 class InvalidTimestampValueInColumn(ValidationError):
+    """Raised when timestamp values in column are outside acceptable range."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Timestamp_Value_In_Column"
     def __init__(self, timestamp_col_name: str) -> None:
+        """Initialize the exception with timestamp validation context.
+        Args:
+            timestamp_col_name: Name of the column containing invalid timestamp values.
+        """
         self.timestamp_col_name = timestamp_col_name
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"At least one timestamp in the column '{self.timestamp_col_name}' is out of range. "
             f"Timestamps must be within {MAX_FUTURE_YEARS_FROM_CURRENT_TIME} year "
@@ -177,14 +273,24 @@ class InvalidTimestampValueInColumn(ValidationError):
 class InvalidStartAndEndTimeValuesInColumn(ValidationError):
+    """Raised when start time is not before end time in span records."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Start_And_End_Time_Values_In_Column"
     def __init__(self, greater_col_name: str, less_col_name: str) -> None:
+        """Initialize the exception with span time validation context.
+        Args:
+            greater_col_name: Name of the column that should have greater values (end time).
+            less_col_name: Name of the column that should have lesser values (start time).
+        """
         self.greater_col_name = greater_col_name
         self.less_col_name = less_col_name
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"Invalid span times. Values in column '{self.greater_col_name}' "
             f"should be greater than values in column '{self.less_col_name}'"
@@ -192,7 +298,10 @@ class InvalidStartAndEndTimeValuesInColumn(ValidationError):
 class InvalidEventValueInColumn(ValidationError):
+    """Raised when event values in column are invalid or malformed."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Event_Value_In_Column"
     def __init__(
@@ -202,12 +311,21 @@ class InvalidEventValueInColumn(ValidationError):
         wrong_time: bool,
         wrong_attrs: bool,
     ) -> None:
+        """Initialize the exception with event value validation context.
+        Args:
+            col_name: Name of the column containing invalid events.
+            wrong_name: Whether event names are invalid.
+            wrong_time: Whether event timestamps are invalid.
+            wrong_attrs: Whether event attributes are invalid.
+        """
         self.col_name = col_name
         self.wrong_name = wrong_name
         self.wrong_time = wrong_time
         self.wrong_attrs = wrong_attrs
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         msg = f"Found at least one invalid event in column '{self.col_name}'. "
         if self.wrong_name:
             msg += (
@@ -228,7 +346,10 @@ class InvalidEventValueInColumn(ValidationError):
 class InvalidLLMMessageValueInColumn(ValidationError):
+    """Raised when LLM message values in column are invalid or malformed."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_LLM_Message_Value_In_Column"
     def __init__(
@@ -238,12 +359,21 @@ class InvalidLLMMessageValueInColumn(ValidationError):
         wrong_content: bool,
         wrong_tool_calls: bool,
     ) -> None:
+        """Initialize the exception with LLM message validation context.
+        Args:
+            col_name: Name of the column containing invalid LLM messages.
+            wrong_role: Whether message roles are invalid.
+            wrong_content: Whether message contents are invalid.
+            wrong_tool_calls: Whether tool calls are invalid.
+        """
         self.col_name = col_name
         self.wrong_role = wrong_role
         self.wrong_content = wrong_content
         self.wrong_tool_calls = wrong_tool_calls
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         msg = f"Found at least one invalid LLM message in column '{self.col_name}'. "
         if self.wrong_role:
             msg += (
@@ -265,17 +395,28 @@ class InvalidLLMMessageValueInColumn(ValidationError):
 class InvalidEmbeddingValueInColumn(ValidationError):
+    """Raised when embedding values in column are invalid or malformed."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Embedding_Value_In_Column"
     def __init__(
         self, col_name: str, wrong_vector: bool, wrong_text: bool
     ) -> None:
+        """Initialize the exception with embedding value validation context.
+        Args:
+            col_name: Name of the column containing invalid embeddings.
+            wrong_vector: Whether embedding vectors are invalid.
+            wrong_text: Whether embedding texts are invalid.
+        """
         self.col_name = col_name
         self.wrong_vector = wrong_vector
         self.wrong_text = wrong_text
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         msg = f"Found at least one invalid embedding object in column '{self.col_name}'. "
         if self.wrong_vector:
             msg += (
@@ -292,7 +433,10 @@ class InvalidEmbeddingValueInColumn(ValidationError):
 class InvalidDocumentValueInColumn(ValidationError):
+    """Raised when document values in column are invalid or malformed."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Document_Value_In_Column"
     def __init__(
@@ -302,12 +446,21 @@ class InvalidDocumentValueInColumn(ValidationError):
         wrong_content: bool,
         wrong_metadata: bool,
     ) -> None:
+        """Initialize the exception with document value validation context.
+        Args:
+            col_name: Name of the column containing invalid documents.
+            wrong_id: Whether document IDs are invalid.
+            wrong_content: Whether document contents are invalid.
+            wrong_metadata: Whether document metadata is invalid.
+        """
         self.col_name = col_name
         self.wrong_id = wrong_id
         self.wrong_content = wrong_content
         self.wrong_metadata = wrong_metadata
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         msg = (
             f"Found at least one invalid document in column '{self.col_name}'. "
         )
@@ -331,13 +484,22 @@ class InvalidDocumentValueInColumn(ValidationError):
 class InvalidFloatValueInColumn(ValidationError):
+    """Raised when float values in column are invalid or out of range."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Float_Value_In_Column"
     def __init__(self, col_name: str) -> None:
+        """Initialize the exception with float value validation context.
+        Args:
+            col_name: Name of the column containing invalid float values.
+        """
         self.col_name = col_name
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The column '{self.col_name}' contains invalid float values. "
             f"Invalid values are +/- infinite values."
@@ -345,13 +507,22 @@ class InvalidFloatValueInColumn(ValidationError):
 class InvalidNullEvalLabelAndScore(ValidationError):
+    """Raised when both eval label and score are null in a record."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Null_Eval_Label_And_Score"
-    def __init__(self, eval_names: List[str]) -> None:
+    def __init__(self, eval_names: list[str]) -> None:
+        """Initialize the exception with eval label and score validation context.
+        Args:
+            eval_names: List of eval names missing both label and score.
+        """
         self.eval_names = eval_names
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"There is at least one row without a label and score for the following evals: "
             f"{log_a_list(self.eval_names, 'and')}"
@@ -359,14 +530,24 @@ class InvalidNullEvalLabelAndScore(ValidationError):
 class DuplicateAnnotationNameInSpan(ValidationError):
+    """Raised when a span contains duplicate annotation names."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Duplicate_Annotation_Name_In_Span"
-    def __init__(self, span_id: str, duplicate_names: List[str]) -> None:
+    def __init__(self, span_id: str, duplicate_names: list[str]) -> None:
+        """Initialize the exception with duplicate annotation names context.
+        Args:
+            span_id: ID of the span containing duplicate annotations.
+            duplicate_names: List of annotation names that are duplicated.
+        """
         self.span_id = span_id
         self.duplicate_names = duplicate_names
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"Found duplicate annotation/eval names within the same span_id '{self.span_id}'. "
             f"Duplicate names: {log_a_list(self.duplicate_names, 'and')}. "
@@ -376,13 +557,22 @@ class DuplicateAnnotationNameInSpan(ValidationError):
 class InvalidNullAnnotationLabelAndScore(ValidationError):
+    """Raised when both annotation label and score are null in a record."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Null_Annotation_Label_And_Score"
-    def __init__(self, annotation_names: List[str]) -> None:
+    def __init__(self, annotation_names: list[str]) -> None:
+        """Initialize the exception with annotation validation context.
+        Args:
+            annotation_names: List of annotation names missing both label and score.
+        """
         self.annotation_names = annotation_names
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             "There is at least one row where both label and score are missing for the "
             f"following annotations: {log_a_list(self.annotation_names, 'and')}. "
@@ -391,16 +581,26 @@ class InvalidNullAnnotationLabelAndScore(ValidationError):
 class InvalidAnnotationColumnFormat(ValidationError):
+    """Raised when annotation column format is invalid or malformed."""
     def __repr__(self) -> str:
+        """Return a string representation for debugging and logging."""
         return "Invalid_Annotation_Column_Format"
     def __init__(
-        self, invalid_format_cols: List[str], expected_format: str
+        self, invalid_format_cols: list[str], expected_format: str
     ) -> None:
+        """Initialize the exception with annotation column format validation context.
+        Args:
+            invalid_format_cols: List of columns with invalid annotation format.
+            expected_format: Expected format for annotation columns.
+        """
         self.invalid_format_cols = invalid_format_cols
         self.expected_format = expected_format
     def error_message(self) -> str:
+        """Return the error message for this exception."""
         return (
             f"The following columns have an invalid annotation column format: "
             f"{log_a_list(self.invalid_format_cols, 'and')}. "

arize/spans/validation/common/value_validation.py CHANGED Viewed

@@ -1,6 +1,7 @@
+"""Common value validation logic for span data."""
 import logging
-from datetime import datetime, timedelta
-from typing import List
+from datetime import datetime, timedelta, timezone
 import numpy as np
 import pandas as pd
@@ -31,7 +32,15 @@ logger = logging.getLogger(__name__)
 def check_invalid_project_name(
     project_name: str | None,
-) -> List[InvalidProjectName]:
+) -> list[InvalidProjectName]:
+    """Validates that the project name is a non-empty string.
+    Args:
+        project_name: The project name to validate.
+    Returns:
+        List of validation errors if project name is invalid (empty if valid).
+    """
     # assume it's been coerced to string beforehand
     if (not isinstance(project_name, str)) or len(project_name.strip()) == 0:
         return [InvalidProjectName()]
@@ -40,7 +49,15 @@ def check_invalid_project_name(
 def check_invalid_model_version(
     model_version: str | None = None,
-) -> List[InvalidModelVersion]:
+) -> list[InvalidModelVersion]:
+    """Validates that the model version, if provided, is a non-empty string.
+    Args:
+        model_version: The optional model version to validate.
+    Returns:
+        List of validation errors if model version is invalid (empty if valid or None).
+    """
     if model_version is None:
         return []
     if not isinstance(model_version, str) or len(model_version.strip()) == 0:
@@ -56,7 +73,20 @@ def check_string_column_value_length(
     max_len: int,
     is_required: bool,
     must_be_json: bool = False,
-) -> List[InvalidMissingValueInColumn | InvalidStringLengthInColumn]:
+) -> list[InvalidMissingValueInColumn | InvalidStringLengthInColumn]:
+    """Validate string column values are within length bounds and optionally valid JSON.
+    Args:
+        df: The DataFrame to validate.
+        col_name: Name of the column to check.
+        min_len: Minimum allowed string length.
+        max_len: Maximum allowed string length.
+        is_required: Whether the column must have non-null values.
+        must_be_json: Whether values must be valid JSON strings. Defaults to False.
+    Returns:
+        List of validation errors for missing values, invalid lengths, or invalid JSON.
+    """
     if col_name not in df.columns:
         return []
@@ -97,9 +127,20 @@ def check_string_column_value_length(
 def check_string_column_allowed_values(
     df: pd.DataFrame,
     col_name: str,
-    allowed_values: List[str],
+    allowed_values: list[str],
     is_required: bool,
-) -> List[InvalidMissingValueInColumn | InvalidStringValueNotAllowedInColumn]:
+) -> list[InvalidMissingValueInColumn | InvalidStringValueNotAllowedInColumn]:
+    """Validate that string column values are within allowed values.
+    Args:
+        df: The DataFrame to validate.
+        col_name: The column name to check.
+        allowed_values: List of allowed string values (case-insensitive).
+        is_required: Whether the column must not have missing values.
+    Returns:
+        List of validation errors found.
+    """
     if col_name not in df.columns:
         return []
@@ -136,7 +177,16 @@ def check_string_column_allowed_values(
 def check_float_column_valid_numbers(
     df: pd.DataFrame,
     col_name: str,
-) -> List[InvalidFloatValueInColumn]:
+) -> list[InvalidFloatValueInColumn]:
+    """Check that float column contains only finite numbers, no infinity values.
+    Args:
+        df: The DataFrame to validate.
+        col_name: The column name to check.
+    Returns:
+        List containing InvalidFloatValueInColumn error if infinite values found.
+    """
     if col_name not in df.columns:
         return []
     # np.isinf will fail on None values, change Nones to np.nan and check on that
@@ -145,18 +195,25 @@ def check_float_column_valid_numbers(
     invalid_exists = invalid_mask.any()
     if invalid_exists:
-        error = [InvalidFloatValueInColumn(col_name=col_name)]
-        return error
+        return [InvalidFloatValueInColumn(col_name=col_name)]
     return []
 def check_value_columns_start_end_time(
     df: pd.DataFrame,
-) -> List[
+) -> list[
     InvalidMissingValueInColumn
     | InvalidTimestampValueInColumn
     | InvalidStartAndEndTimeValuesInColumn
 ]:
+    """Validate start and end time columns for timestamps and logical ordering.
+    Args:
+        df: The DataFrame containing start and end time columns.
+    Returns:
+        List of validation errors for missing values, invalid timestamps, or start > end.
+    """
     errors = []
     errors += check_value_timestamp(
         df=df,
@@ -186,7 +243,17 @@ def check_value_timestamp(
     df: pd.DataFrame,
     col_name: str,
     is_required: bool,
-) -> List[InvalidMissingValueInColumn | InvalidTimestampValueInColumn]:
+) -> list[InvalidMissingValueInColumn | InvalidTimestampValueInColumn]:
+    """Validate timestamp column values are within reasonable bounds.
+    Args:
+        df: The DataFrame to validate.
+        col_name: The column name containing timestamps in nanoseconds.
+        is_required: Whether missing values should be flagged as errors.
+    Returns:
+        List of validation errors for missing or out-of-bounds timestamps.
+    """
     # This check expects that timestamps have previously been converted to nanoseconds
     if col_name not in df.columns:
         return []
@@ -199,7 +266,7 @@ def check_value_timestamp(
             )
         )
-    now_t = datetime.now()
+    now_t = datetime.now(tz=timezone.utc)
     lbound, ubound = (
         (
             now_t - timedelta(days=MAX_PAST_YEARS_FROM_CURRENT_TIME * 365)

arize/spans/validation/evals/__init__.py CHANGED Viewed

	@@ -0,0 +1 @@
1	+ """Evaluation data validation for LLM tracing spans."""

arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

arize 8.0.0a21py3-none-any.whl → 8.0.0a23py3-none-any.whl