arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +28 -19
- arize/_exporter/client.py +56 -37
- arize/_exporter/parsers/tracing_data_parser.py +41 -30
- arize/_exporter/validation.py +3 -3
- arize/_flight/client.py +207 -76
- arize/_generated/api_client/__init__.py +30 -6
- arize/_generated/api_client/api/__init__.py +1 -0
- arize/_generated/api_client/api/datasets_api.py +864 -190
- arize/_generated/api_client/api/experiments_api.py +167 -131
- arize/_generated/api_client/api/projects_api.py +1197 -0
- arize/_generated/api_client/api_client.py +2 -2
- arize/_generated/api_client/configuration.py +42 -34
- arize/_generated/api_client/exceptions.py +2 -2
- arize/_generated/api_client/models/__init__.py +15 -4
- arize/_generated/api_client/models/dataset.py +10 -10
- arize/_generated/api_client/models/dataset_example.py +111 -0
- arize/_generated/api_client/models/dataset_example_update.py +100 -0
- arize/_generated/api_client/models/dataset_version.py +13 -13
- arize/_generated/api_client/models/datasets_create_request.py +16 -8
- arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
- arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
- arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
- arize/_generated/api_client/models/datasets_list200_response.py +10 -4
- arize/_generated/api_client/models/experiment.py +14 -16
- arize/_generated/api_client/models/experiment_run.py +108 -0
- arize/_generated/api_client/models/experiment_run_create.py +102 -0
- arize/_generated/api_client/models/experiments_create_request.py +16 -10
- arize/_generated/api_client/models/experiments_list200_response.py +10 -4
- arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
- arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
- arize/_generated/api_client/models/primitive_value.py +172 -0
- arize/_generated/api_client/models/problem.py +100 -0
- arize/_generated/api_client/models/project.py +99 -0
- arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
- arize/_generated/api_client/models/projects_list200_response.py +106 -0
- arize/_generated/api_client/rest.py +2 -2
- arize/_generated/api_client/test/test_dataset.py +4 -2
- arize/_generated/api_client/test/test_dataset_example.py +56 -0
- arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
- arize/_generated/api_client/test/test_dataset_version.py +7 -2
- arize/_generated/api_client/test/test_datasets_api.py +27 -13
- arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
- arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
- arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
- arize/_generated/api_client/test/test_experiment.py +2 -4
- arize/_generated/api_client/test/test_experiment_run.py +56 -0
- arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
- arize/_generated/api_client/test/test_experiments_api.py +6 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
- arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
- arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
- arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
- arize/_generated/api_client/test/test_problem.py +57 -0
- arize/_generated/api_client/test/test_project.py +58 -0
- arize/_generated/api_client/test/test_projects_api.py +59 -0
- arize/_generated/api_client/test/test_projects_create_request.py +54 -0
- arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
- arize/_generated/api_client_README.md +43 -29
- arize/_generated/protocol/flight/flight_pb2.py +400 -0
- arize/_lazy.py +27 -19
- arize/client.py +181 -58
- arize/config.py +324 -116
- arize/constants/__init__.py +1 -0
- arize/constants/config.py +11 -4
- arize/constants/ml.py +6 -4
- arize/constants/openinference.py +2 -0
- arize/constants/pyarrow.py +2 -0
- arize/constants/spans.py +3 -1
- arize/datasets/__init__.py +1 -0
- arize/datasets/client.py +304 -84
- arize/datasets/errors.py +32 -2
- arize/datasets/validation.py +18 -8
- arize/embeddings/__init__.py +2 -0
- arize/embeddings/auto_generator.py +23 -19
- arize/embeddings/base_generators.py +89 -36
- arize/embeddings/constants.py +2 -0
- arize/embeddings/cv_generators.py +26 -4
- arize/embeddings/errors.py +27 -5
- arize/embeddings/nlp_generators.py +43 -18
- arize/embeddings/tabular_generators.py +46 -31
- arize/embeddings/usecases.py +12 -2
- arize/exceptions/__init__.py +1 -0
- arize/exceptions/auth.py +11 -1
- arize/exceptions/base.py +29 -4
- arize/exceptions/models.py +21 -2
- arize/exceptions/parameters.py +31 -0
- arize/exceptions/spaces.py +12 -1
- arize/exceptions/types.py +86 -7
- arize/exceptions/values.py +220 -20
- arize/experiments/__init__.py +13 -0
- arize/experiments/client.py +394 -285
- arize/experiments/evaluators/__init__.py +1 -0
- arize/experiments/evaluators/base.py +74 -41
- arize/experiments/evaluators/exceptions.py +6 -3
- arize/experiments/evaluators/executors.py +121 -73
- arize/experiments/evaluators/rate_limiters.py +106 -57
- arize/experiments/evaluators/types.py +34 -7
- arize/experiments/evaluators/utils.py +65 -27
- arize/experiments/functions.py +103 -101
- arize/experiments/tracing.py +52 -44
- arize/experiments/types.py +56 -31
- arize/logging.py +54 -22
- arize/ml/__init__.py +1 -0
- arize/ml/batch_validation/__init__.py +1 -0
- arize/{models → ml}/batch_validation/errors.py +545 -67
- arize/{models → ml}/batch_validation/validator.py +344 -303
- arize/ml/bounded_executor.py +47 -0
- arize/{models → ml}/casting.py +118 -108
- arize/{models → ml}/client.py +339 -118
- arize/{models → ml}/proto.py +97 -42
- arize/{models → ml}/stream_validation.py +43 -15
- arize/ml/surrogate_explainer/__init__.py +1 -0
- arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
- arize/{types.py → ml/types.py} +355 -354
- arize/pre_releases.py +44 -0
- arize/projects/__init__.py +1 -0
- arize/projects/client.py +134 -0
- arize/regions.py +40 -0
- arize/spans/__init__.py +1 -0
- arize/spans/client.py +204 -175
- arize/spans/columns.py +13 -0
- arize/spans/conversion.py +60 -37
- arize/spans/validation/__init__.py +1 -0
- arize/spans/validation/annotations/__init__.py +1 -0
- arize/spans/validation/annotations/annotations_validation.py +6 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
- arize/spans/validation/annotations/value_validation.py +35 -11
- arize/spans/validation/common/__init__.py +1 -0
- arize/spans/validation/common/argument_validation.py +33 -8
- arize/spans/validation/common/dataframe_form_validation.py +35 -9
- arize/spans/validation/common/errors.py +211 -11
- arize/spans/validation/common/value_validation.py +81 -14
- arize/spans/validation/evals/__init__.py +1 -0
- arize/spans/validation/evals/dataframe_form_validation.py +28 -8
- arize/spans/validation/evals/evals_validation.py +34 -4
- arize/spans/validation/evals/value_validation.py +26 -3
- arize/spans/validation/metadata/__init__.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +14 -5
- arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
- arize/spans/validation/metadata/value_validation.py +24 -10
- arize/spans/validation/spans/__init__.py +1 -0
- arize/spans/validation/spans/dataframe_form_validation.py +35 -14
- arize/spans/validation/spans/spans_validation.py +35 -4
- arize/spans/validation/spans/value_validation.py +78 -8
- arize/utils/__init__.py +1 -0
- arize/utils/arrow.py +31 -15
- arize/utils/cache.py +34 -6
- arize/utils/dataframe.py +20 -3
- arize/utils/online_tasks/__init__.py +2 -0
- arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
- arize/utils/openinference_conversion.py +44 -5
- arize/utils/proto.py +10 -0
- arize/utils/size.py +5 -3
- arize/utils/types.py +105 -0
- arize/version.py +3 -1
- {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
- arize-8.0.0b0.dist-info/RECORD +175 -0
- {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
- arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
- arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
- arize/_generated/protocol/flight/export_pb2.py +0 -61
- arize/_generated/protocol/flight/ingest_pb2.py +0 -365
- arize/models/__init__.py +0 -0
- arize/models/batch_validation/__init__.py +0 -0
- arize/models/bounded_executor.py +0 -34
- arize/models/surrogate_explainer/__init__.py +0 -0
- arize-8.0.0a22.dist-info/RECORD +0 -146
- arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
"""Tabular data embedding generators for structured feature embeddings."""
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from functools import partial
|
|
3
|
-
from typing import Dict, List, Optional, Tuple, Union, cast
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
6
7
|
|
|
@@ -10,7 +11,7 @@ from arize.embeddings.constants import (
|
|
|
10
11
|
IMPORT_ERROR_MESSAGE,
|
|
11
12
|
)
|
|
12
13
|
from arize.embeddings.usecases import UseCases
|
|
13
|
-
from arize.types import is_list_of
|
|
14
|
+
from arize.utils.types import is_list_of
|
|
14
15
|
|
|
15
16
|
try:
|
|
16
17
|
from datasets import Dataset
|
|
@@ -27,7 +28,10 @@ TABULAR_PRETRAINED_MODELS = [
|
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
31
|
+
"""Embedding generator for tabular feature data using prompt-based LLM encoding."""
|
|
32
|
+
|
|
30
33
|
def __repr__(self) -> str:
|
|
34
|
+
"""Return a string representation of the tabular embedding generator."""
|
|
31
35
|
return (
|
|
32
36
|
f"{self.__class__.__name__}(\n"
|
|
33
37
|
f" use_case={self.use_case},\n"
|
|
@@ -41,8 +45,17 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
|
41
45
|
def __init__(
|
|
42
46
|
self,
|
|
43
47
|
model_name: str = DEFAULT_TABULAR_MODEL,
|
|
44
|
-
**kwargs,
|
|
45
|
-
):
|
|
48
|
+
**kwargs: object,
|
|
49
|
+
) -> None:
|
|
50
|
+
"""Initialize the tabular features embedding generator.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
model_name: Name of the pre-trained NLP model for tabular data.
|
|
54
|
+
**kwargs: Additional arguments for model initialization.
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ValueError: If model_name is not in supported models list.
|
|
58
|
+
"""
|
|
46
59
|
if model_name not in TABULAR_PRETRAINED_MODELS:
|
|
47
60
|
raise ValueError(
|
|
48
61
|
"model_name not supported. Check supported models with "
|
|
@@ -57,25 +70,28 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
|
57
70
|
def generate_embeddings(
|
|
58
71
|
self,
|
|
59
72
|
df: pd.DataFrame,
|
|
60
|
-
selected_columns:
|
|
61
|
-
col_name_map:
|
|
73
|
+
selected_columns: list[str],
|
|
74
|
+
col_name_map: dict[str, str] | None = None,
|
|
62
75
|
return_prompt_col: bool = False,
|
|
63
|
-
) ->
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
`selected_columns` and passed to a pre-trained
|
|
67
|
-
computation.
|
|
68
|
-
|
|
69
|
-
:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
76
|
+
) -> pd.Series | tuple[pd.Series, pd.Series]:
|
|
77
|
+
"""Obtain embedding vectors from your tabular data.
|
|
78
|
+
|
|
79
|
+
Prompts are generated from your `selected_columns` and passed to a pre-trained
|
|
80
|
+
large language model for embedding vector computation.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
df: Pandas DataFrame containing the tabular data. Not all columns will be
|
|
84
|
+
considered, see `selected_columns`.
|
|
85
|
+
selected_columns: Columns to be considered to construct the prompt to be passed to
|
|
86
|
+
the LLM.
|
|
87
|
+
col_name_map: Mapping between selected column names and a more verbose description of
|
|
88
|
+
the name. This helps the LLM understand the features better.
|
|
89
|
+
return_prompt_col: If set to True, an extra pandas Series will be returned
|
|
90
|
+
containing the constructed prompts. Defaults to False.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
A pandas Series containing the embedding vectors and, if `return_prompt_col` is
|
|
94
|
+
set to True, a pandas Series containing the prompts created from tabular features.
|
|
79
95
|
"""
|
|
80
96
|
if col_name_map is None:
|
|
81
97
|
col_name_map = {}
|
|
@@ -99,7 +115,7 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
|
99
115
|
)
|
|
100
116
|
for k, v in col_name_map.items():
|
|
101
117
|
if not isinstance(k, str) or not isinstance(v, str):
|
|
102
|
-
raise
|
|
118
|
+
raise TypeError(
|
|
103
119
|
"col_name_map dictionary keys and values should be strings"
|
|
104
120
|
)
|
|
105
121
|
missing_cols = set(col_name_map.keys()).difference(df.columns)
|
|
@@ -109,7 +125,7 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
|
109
125
|
f"The following columns are not found {missing_cols}"
|
|
110
126
|
)
|
|
111
127
|
|
|
112
|
-
prompts = df.rename(columns=col_name_map).apply(
|
|
128
|
+
prompts: pd.Series = df.rename(columns=col_name_map).apply(
|
|
113
129
|
partial(
|
|
114
130
|
self.__prompt_fn,
|
|
115
131
|
columns=[
|
|
@@ -129,22 +145,20 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
|
129
145
|
batch_size=self.batch_size,
|
|
130
146
|
)
|
|
131
147
|
|
|
148
|
+
df: pd.DataFrame = ds.to_pandas()
|
|
132
149
|
if return_prompt_col:
|
|
133
|
-
return
|
|
134
|
-
cast(pd.DataFrame, ds.to_pandas())["embedding_vector"],
|
|
135
|
-
cast(pd.Series, prompts),
|
|
136
|
-
)
|
|
150
|
+
return df["embedding_vector"], prompts
|
|
137
151
|
|
|
138
|
-
return
|
|
152
|
+
return df["embedding_vector"]
|
|
139
153
|
|
|
140
154
|
@staticmethod
|
|
141
|
-
def __prompt_fn(row: pd.DataFrame, columns:
|
|
155
|
+
def __prompt_fn(row: pd.DataFrame, columns: list[str]) -> str:
|
|
142
156
|
return " ".join(
|
|
143
157
|
f"The {col.replace('_', ' ')} is {str(row[col]).strip()}."
|
|
144
158
|
for col in columns
|
|
145
159
|
)
|
|
146
160
|
|
|
147
|
-
def __get_method_for_embedding_calculation(self):
|
|
161
|
+
def __get_method_for_embedding_calculation(self) -> str:
|
|
148
162
|
try:
|
|
149
163
|
return {
|
|
150
164
|
"bert-base-uncased": "avg_token",
|
|
@@ -158,4 +172,5 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
|
158
172
|
|
|
159
173
|
@staticmethod
|
|
160
174
|
def list_pretrained_models() -> pd.DataFrame:
|
|
175
|
+
"""Return a DataFrame of available pretrained tabular models."""
|
|
161
176
|
return pd.DataFrame({"Model Name": sorted(TABULAR_PRETRAINED_MODELS)})
|
arize/embeddings/usecases.py
CHANGED
|
@@ -1,26 +1,36 @@
|
|
|
1
|
+
"""Use case definitions and enums for embedding generation."""
|
|
2
|
+
|
|
1
3
|
from dataclasses import dataclass
|
|
2
4
|
from enum import Enum, auto, unique
|
|
3
5
|
|
|
4
6
|
|
|
5
7
|
@unique
|
|
6
8
|
class NLPUseCases(Enum):
|
|
9
|
+
"""Enum representing supported NLP use cases for embedding generation."""
|
|
10
|
+
|
|
7
11
|
SEQUENCE_CLASSIFICATION = auto()
|
|
8
12
|
SUMMARIZATION = auto()
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
@unique
|
|
12
16
|
class CVUseCases(Enum):
|
|
17
|
+
"""Enum representing supported computer vision use cases for embedding generation."""
|
|
18
|
+
|
|
13
19
|
IMAGE_CLASSIFICATION = auto()
|
|
14
20
|
OBJECT_DETECTION = auto()
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
@unique
|
|
18
|
-
class
|
|
24
|
+
class TabularUseCases(Enum):
|
|
25
|
+
"""Enum representing supported tabular/structured data use cases for embedding generation."""
|
|
26
|
+
|
|
19
27
|
TABULAR_EMBEDDINGS = auto()
|
|
20
28
|
|
|
21
29
|
|
|
22
30
|
@dataclass
|
|
23
31
|
class UseCases:
|
|
32
|
+
"""Container grouping all use case enums for embedding generators."""
|
|
33
|
+
|
|
24
34
|
NLP = NLPUseCases
|
|
25
35
|
CV = CVUseCases
|
|
26
|
-
STRUCTURED =
|
|
36
|
+
STRUCTURED = TabularUseCases
|
arize/exceptions/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Custom exceptions and error types for the Arize SDK."""
|
arize/exceptions/auth.py
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
|
+
"""Authentication-related exception classes."""
|
|
2
|
+
|
|
1
3
|
from arize.constants.config import ENV_API_KEY
|
|
2
4
|
|
|
3
5
|
|
|
4
6
|
class MissingAPIKeyError(Exception):
|
|
5
|
-
|
|
7
|
+
"""Raised when API key is not provided via environment or configuration."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, message: str = "") -> None:
|
|
10
|
+
"""Initialize the exception with an optional custom message.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
message: Custom error message, or empty string for default.
|
|
14
|
+
"""
|
|
6
15
|
self.message = message
|
|
7
16
|
|
|
8
17
|
def __str__(self) -> str:
|
|
18
|
+
"""Return the error message."""
|
|
9
19
|
return self.message or self._default_message()
|
|
10
20
|
|
|
11
21
|
@staticmethod
|
arize/exceptions/base.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
"""Base exception classes and common error messages."""
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
4
|
from collections.abc import Iterable
|
|
3
|
-
from typing import List
|
|
4
5
|
|
|
5
6
|
INVALID_ARROW_CONVERSION_MSG = (
|
|
6
7
|
"The dataframe needs to convert to pyarrow but has failed to do so. "
|
|
@@ -11,20 +12,30 @@ INVALID_ARROW_CONVERSION_MSG = (
|
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
class ValidationError(Exception, ABC):
|
|
15
|
+
"""Base exception for validation errors in data and schema validation."""
|
|
16
|
+
|
|
14
17
|
def __str__(self) -> str:
|
|
18
|
+
"""Return a human-readable error message."""
|
|
15
19
|
return self.error_message()
|
|
16
20
|
|
|
17
21
|
@abstractmethod
|
|
18
22
|
def __repr__(self) -> str:
|
|
19
|
-
|
|
23
|
+
"""Return a string representation for debugging and logging."""
|
|
20
24
|
|
|
21
25
|
@abstractmethod
|
|
22
26
|
def error_message(self) -> str:
|
|
23
|
-
|
|
27
|
+
"""Return the error message for this exception."""
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
class ValidationFailure(Exception):
|
|
27
|
-
|
|
31
|
+
"""Raised when one or more validation errors occur during validation."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, errors: list[ValidationError]) -> None:
|
|
34
|
+
"""Initialize the exception with a list of validation errors.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
errors: List of ValidationError instances that occurred.
|
|
38
|
+
"""
|
|
28
39
|
self.errors = errors
|
|
29
40
|
|
|
30
41
|
|
|
@@ -44,14 +55,24 @@ class ValidationFailure(Exception):
|
|
|
44
55
|
|
|
45
56
|
|
|
46
57
|
class InvalidFieldTypeConversion(ValidationError):
|
|
58
|
+
"""Raised when fields cannot be converted to required type."""
|
|
59
|
+
|
|
47
60
|
def __repr__(self) -> str:
|
|
61
|
+
"""Return a string representation for debugging and logging."""
|
|
48
62
|
return "Invalid_Input_Type_Conversion"
|
|
49
63
|
|
|
50
64
|
def __init__(self, fields: Iterable, type: str) -> None:
|
|
65
|
+
"""Initialize the exception with type conversion context.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
fields: Fields that failed type conversion.
|
|
69
|
+
type: Target type that fields should be convertible to.
|
|
70
|
+
"""
|
|
51
71
|
self.fields = fields
|
|
52
72
|
self.type = type
|
|
53
73
|
|
|
54
74
|
def error_message(self) -> str:
|
|
75
|
+
"""Return the error message for this exception."""
|
|
55
76
|
return (
|
|
56
77
|
f"The following fields must be convertible to {self.type}: "
|
|
57
78
|
f"{', '.join(map(str, self.fields))}."
|
|
@@ -84,10 +105,14 @@ class InvalidFieldTypeConversion(ValidationError):
|
|
|
84
105
|
|
|
85
106
|
|
|
86
107
|
class InvalidDataFrameIndex(ValidationError):
|
|
108
|
+
"""Raised when DataFrame has an invalid index that needs to be reset."""
|
|
109
|
+
|
|
87
110
|
def __repr__(self) -> str:
|
|
111
|
+
"""Return a string representation for debugging and logging."""
|
|
88
112
|
return "Invalid_Index"
|
|
89
113
|
|
|
90
114
|
def error_message(self) -> str:
|
|
115
|
+
"""Return the error message for this exception."""
|
|
91
116
|
return (
|
|
92
117
|
"The index of the dataframe is invalid; "
|
|
93
118
|
"reset the index by using df.reset_index(drop=True, inplace=True)"
|
arize/exceptions/models.py
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
|
+
"""Model and project-related exception classes."""
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class MissingProjectNameError(Exception):
|
|
2
|
-
|
|
5
|
+
"""Raised when project name is required but not provided."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, message: str = "") -> None:
|
|
8
|
+
"""Initialize the exception with an optional custom message.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
message: Custom error message, or empty string for default.
|
|
12
|
+
"""
|
|
3
13
|
self.message = message
|
|
4
14
|
|
|
5
15
|
def __str__(self) -> str:
|
|
16
|
+
"""Return the error message."""
|
|
6
17
|
return self.message or self._default_message()
|
|
7
18
|
|
|
8
19
|
@staticmethod
|
|
@@ -11,10 +22,18 @@ class MissingProjectNameError(Exception):
|
|
|
11
22
|
|
|
12
23
|
|
|
13
24
|
class MissingModelNameError(Exception):
|
|
14
|
-
|
|
25
|
+
"""Raised when model name is required but not provided."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, message: str = "") -> None:
|
|
28
|
+
"""Initialize the exception with an optional custom message.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
message: Custom error message, or empty string for default.
|
|
32
|
+
"""
|
|
15
33
|
self.message = message
|
|
16
34
|
|
|
17
35
|
def __str__(self) -> str:
|
|
36
|
+
"""Return the error message."""
|
|
18
37
|
return self.message or self._default_message()
|
|
19
38
|
|
|
20
39
|
@staticmethod
|
arize/exceptions/parameters.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Parameter validation exception classes."""
|
|
2
|
+
|
|
1
3
|
from arize.constants.ml import MAX_NUMBER_OF_EMBEDDINGS
|
|
2
4
|
from arize.exceptions.base import ValidationError
|
|
3
5
|
|
|
@@ -156,10 +158,14 @@ from arize.exceptions.base import ValidationError
|
|
|
156
158
|
|
|
157
159
|
|
|
158
160
|
class InvalidModelVersion(ValidationError):
|
|
161
|
+
"""Raised when model version is empty or invalid."""
|
|
162
|
+
|
|
159
163
|
def __repr__(self) -> str:
|
|
164
|
+
"""Return a string representation for debugging and logging."""
|
|
160
165
|
return "Invalid_Model_Version"
|
|
161
166
|
|
|
162
167
|
def error_message(self) -> str:
|
|
168
|
+
"""Return the error message for this exception."""
|
|
163
169
|
return "Model version must be a nonempty string."
|
|
164
170
|
|
|
165
171
|
|
|
@@ -172,10 +178,14 @@ class InvalidModelVersion(ValidationError):
|
|
|
172
178
|
|
|
173
179
|
|
|
174
180
|
class InvalidProjectName(ValidationError):
|
|
181
|
+
"""Raised when project name is empty or invalid."""
|
|
182
|
+
|
|
175
183
|
def __repr__(self) -> str:
|
|
184
|
+
"""Return a string representation for debugging and logging."""
|
|
176
185
|
return "Invalid_Project_Name"
|
|
177
186
|
|
|
178
187
|
def error_message(self) -> str:
|
|
188
|
+
"""Return the error message for this exception."""
|
|
179
189
|
return (
|
|
180
190
|
"Project Name must be a nonempty string. "
|
|
181
191
|
"If Model ID was used instead of Project Name, "
|
|
@@ -352,13 +362,22 @@ class InvalidProjectName(ValidationError):
|
|
|
352
362
|
|
|
353
363
|
|
|
354
364
|
class InvalidNumberOfEmbeddings(ValidationError):
|
|
365
|
+
"""Raised when number of embedding features exceeds the maximum allowed."""
|
|
366
|
+
|
|
355
367
|
def __repr__(self) -> str:
|
|
368
|
+
"""Return a string representation for debugging and logging."""
|
|
356
369
|
return "Invalid_Number_Of_Embeddings"
|
|
357
370
|
|
|
358
371
|
def __init__(self, number_of_embeddings: int) -> None:
|
|
372
|
+
"""Initialize the exception with embedding count context.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
number_of_embeddings: The number of embeddings found in the schema.
|
|
376
|
+
"""
|
|
359
377
|
self.number_of_embeddings = number_of_embeddings
|
|
360
378
|
|
|
361
379
|
def error_message(self) -> str:
|
|
380
|
+
"""Return the error message for this exception."""
|
|
362
381
|
return (
|
|
363
382
|
f"The schema contains {self.number_of_embeddings} different embeddings when a maximum of "
|
|
364
383
|
f"{MAX_NUMBER_OF_EMBEDDINGS} is allowed."
|
|
@@ -366,23 +385,35 @@ class InvalidNumberOfEmbeddings(ValidationError):
|
|
|
366
385
|
|
|
367
386
|
|
|
368
387
|
class InvalidValueType(Exception):
|
|
388
|
+
"""Raised when a value has an invalid or unexpected type."""
|
|
389
|
+
|
|
369
390
|
def __init__(
|
|
370
391
|
self,
|
|
371
392
|
value_name: str,
|
|
372
393
|
value: bool | int | float | str,
|
|
373
394
|
correct_type: str,
|
|
374
395
|
) -> None:
|
|
396
|
+
"""Initialize the exception with value type validation context.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
value_name: Name of the value with invalid type.
|
|
400
|
+
value: The actual value that has the wrong type.
|
|
401
|
+
correct_type: Description of the expected type.
|
|
402
|
+
"""
|
|
375
403
|
self.value_name = value_name
|
|
376
404
|
self.value = value
|
|
377
405
|
self.correct_type = correct_type
|
|
378
406
|
|
|
379
407
|
def __repr__(self) -> str:
|
|
408
|
+
"""Return a string representation for debugging and logging."""
|
|
380
409
|
return "Invalid_Value_Type"
|
|
381
410
|
|
|
382
411
|
def __str__(self) -> str:
|
|
412
|
+
"""Return a human-readable error message."""
|
|
383
413
|
return self.error_message()
|
|
384
414
|
|
|
385
415
|
def error_message(self) -> str:
|
|
416
|
+
"""Return the error message for this exception."""
|
|
386
417
|
return (
|
|
387
418
|
f"{self.value_name} with value {self.value} is of type {type(self.value).__name__}, "
|
|
388
419
|
f"but expected {self.correct_type}"
|
arize/exceptions/spaces.py
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
|
+
"""Space-related exception classes."""
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class MissingSpaceIDError(Exception):
|
|
2
|
-
|
|
5
|
+
"""Raised when space ID is required but not provided."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, message: str = "") -> None:
|
|
8
|
+
"""Initialize the exception with an optional custom message.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
message: Custom error message, or empty string for default.
|
|
12
|
+
"""
|
|
3
13
|
self.message = message
|
|
4
14
|
|
|
5
15
|
def __str__(self) -> str:
|
|
16
|
+
"""Return the error message."""
|
|
6
17
|
return self.message or self._default_message()
|
|
7
18
|
|
|
8
19
|
@staticmethod
|
arize/exceptions/types.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
"""Type validation exception classes."""
|
|
2
|
+
|
|
1
3
|
from collections.abc import Iterable
|
|
2
|
-
from typing import List
|
|
3
4
|
|
|
4
5
|
from arize.constants.ml import (
|
|
5
6
|
MAX_EMBEDDING_DIMENSIONALITY,
|
|
@@ -9,17 +10,28 @@ from arize.exceptions.base import ValidationError
|
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class InvalidType(ValidationError):
|
|
13
|
+
"""Raised when a field has an invalid type compared to expected types."""
|
|
14
|
+
|
|
12
15
|
def __repr__(self) -> str:
|
|
16
|
+
"""Return a string representation for debugging and logging."""
|
|
13
17
|
return "Invalid_Type"
|
|
14
18
|
|
|
15
19
|
def __init__(
|
|
16
|
-
self, name: str, expected_types:
|
|
20
|
+
self, name: str, expected_types: list[str], found_data_type: str
|
|
17
21
|
) -> None:
|
|
22
|
+
"""Initialize the exception with type validation context.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
name: Name of the field with invalid type.
|
|
26
|
+
expected_types: List of expected data types.
|
|
27
|
+
found_data_type: Actual data type found.
|
|
28
|
+
"""
|
|
18
29
|
self.name = name
|
|
19
30
|
self.expected_types = expected_types
|
|
20
31
|
self.found_data_type = found_data_type
|
|
21
32
|
|
|
22
33
|
def error_message(self) -> str:
|
|
34
|
+
"""Return the error message for this exception."""
|
|
23
35
|
type_list = (
|
|
24
36
|
self.expected_types[0]
|
|
25
37
|
if len(self.expected_types) == 1
|
|
@@ -33,16 +45,26 @@ class InvalidType(ValidationError):
|
|
|
33
45
|
|
|
34
46
|
|
|
35
47
|
class InvalidTypeColumns(ValidationError):
|
|
48
|
+
"""Raised when columns have invalid types compared to expected types."""
|
|
49
|
+
|
|
36
50
|
def __repr__(self) -> str:
|
|
51
|
+
"""Return a string representation for debugging and logging."""
|
|
37
52
|
return "Invalid_Type_Columns"
|
|
38
53
|
|
|
39
54
|
def __init__(
|
|
40
|
-
self, wrong_type_columns:
|
|
55
|
+
self, wrong_type_columns: list[str], expected_types: list[str]
|
|
41
56
|
) -> None:
|
|
57
|
+
"""Initialize the exception with column type validation context.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
wrong_type_columns: Columns with incorrect data types.
|
|
61
|
+
expected_types: List of expected data types for the columns.
|
|
62
|
+
"""
|
|
42
63
|
self.wrong_type_columns = wrong_type_columns
|
|
43
64
|
self.expected_types = expected_types
|
|
44
65
|
|
|
45
66
|
def error_message(self) -> str:
|
|
67
|
+
"""Return the error message for this exception."""
|
|
46
68
|
col_list = (
|
|
47
69
|
self.wrong_type_columns[0]
|
|
48
70
|
if len(self.wrong_type_columns) == 1
|
|
@@ -57,14 +79,24 @@ class InvalidTypeColumns(ValidationError):
|
|
|
57
79
|
|
|
58
80
|
|
|
59
81
|
class InvalidTypeFeatures(ValidationError):
|
|
82
|
+
"""Raised when feature columns have invalid types."""
|
|
83
|
+
|
|
60
84
|
def __repr__(self) -> str:
|
|
85
|
+
"""Return a string representation for debugging and logging."""
|
|
61
86
|
return "Invalid_Type_Features"
|
|
62
87
|
|
|
63
|
-
def __init__(self, cols: Iterable, expected_types:
|
|
88
|
+
def __init__(self, cols: Iterable, expected_types: list[str]) -> None:
|
|
89
|
+
"""Initialize the exception with feature type validation context.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
cols: Feature columns with unrecognized data types.
|
|
93
|
+
expected_types: List of expected data types for features.
|
|
94
|
+
"""
|
|
64
95
|
self.wrong_type_columns = cols
|
|
65
96
|
self.expected_types = expected_types
|
|
66
97
|
|
|
67
98
|
def error_message(self) -> str:
|
|
99
|
+
"""Return the error message for this exception."""
|
|
68
100
|
type_list = (
|
|
69
101
|
self.expected_types[0]
|
|
70
102
|
if len(self.expected_types) == 1
|
|
@@ -78,30 +110,48 @@ class InvalidTypeFeatures(ValidationError):
|
|
|
78
110
|
|
|
79
111
|
|
|
80
112
|
class InvalidFieldTypePromptTemplates(ValidationError):
|
|
113
|
+
"""Raised when prompt template field has invalid type."""
|
|
114
|
+
|
|
81
115
|
def __repr__(self) -> str:
|
|
116
|
+
"""Return a string representation for debugging and logging."""
|
|
82
117
|
return "Invalid_Input_Type_Prompt_Templates"
|
|
83
118
|
|
|
84
119
|
def error_message(self) -> str:
|
|
120
|
+
"""Return the error message for this exception."""
|
|
85
121
|
return "prompt_template_column_names must be of type PromptTemplateColumnNames"
|
|
86
122
|
|
|
87
123
|
|
|
88
124
|
class InvalidFieldTypeLlmConfig(ValidationError):
|
|
125
|
+
"""Raised when LLM config field has invalid type."""
|
|
126
|
+
|
|
89
127
|
def __repr__(self) -> str:
|
|
128
|
+
"""Return a string representation for debugging and logging."""
|
|
90
129
|
return "Invalid_Input_Type_LLM_Config"
|
|
91
130
|
|
|
92
131
|
def error_message(self) -> str:
|
|
132
|
+
"""Return the error message for this exception."""
|
|
93
133
|
return "llm_config_column_names must be of type LLMConfigColumnNames"
|
|
94
134
|
|
|
95
135
|
|
|
96
136
|
class InvalidTypeTags(ValidationError):
|
|
137
|
+
"""Raised when tag columns have invalid types."""
|
|
138
|
+
|
|
97
139
|
def __repr__(self) -> str:
|
|
140
|
+
"""Return a string representation for debugging and logging."""
|
|
98
141
|
return "Invalid_Type_Tags"
|
|
99
142
|
|
|
100
|
-
def __init__(self, cols: Iterable, expected_types:
|
|
143
|
+
def __init__(self, cols: Iterable, expected_types: list[str]) -> None:
|
|
144
|
+
"""Initialize the exception with tag type validation context.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
cols: Tag columns with unrecognized data types.
|
|
148
|
+
expected_types: List of expected data types for tags.
|
|
149
|
+
"""
|
|
101
150
|
self.wrong_type_columns = cols
|
|
102
151
|
self.expected_types = expected_types
|
|
103
152
|
|
|
104
153
|
def error_message(self) -> str:
|
|
154
|
+
"""Return the error message for this exception."""
|
|
105
155
|
type_list = (
|
|
106
156
|
self.expected_types[0]
|
|
107
157
|
if len(self.expected_types) == 1
|
|
@@ -115,14 +165,24 @@ class InvalidTypeTags(ValidationError):
|
|
|
115
165
|
|
|
116
166
|
|
|
117
167
|
class InvalidValueEmbeddingVectorDimensionality(ValidationError):
|
|
168
|
+
"""Raised when embedding vectors have invalid dimensionality."""
|
|
169
|
+
|
|
118
170
|
def __repr__(self) -> str:
|
|
171
|
+
"""Return a string representation for debugging and logging."""
|
|
119
172
|
return "Invalid_Value_Embedding_Vector_Dimensionality"
|
|
120
173
|
|
|
121
|
-
def __init__(self, dim_1_cols:
|
|
174
|
+
def __init__(self, dim_1_cols: list[str], high_dim_cols: list[str]) -> None:
|
|
175
|
+
"""Initialize the exception with embedding dimensionality context.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
dim_1_cols: Columns with dimensionality of 1.
|
|
179
|
+
high_dim_cols: Columns with dimensionality exceeding the maximum.
|
|
180
|
+
"""
|
|
122
181
|
self.dim_1_cols = dim_1_cols
|
|
123
182
|
self.high_dim_cols = high_dim_cols
|
|
124
183
|
|
|
125
184
|
def error_message(self) -> str:
|
|
185
|
+
"""Return the error message for this exception."""
|
|
126
186
|
msg = (
|
|
127
187
|
"Embedding vectors cannot have length (dimensionality) of 1 or higher "
|
|
128
188
|
f"than {MAX_EMBEDDING_DIMENSIONALITY}. "
|
|
@@ -139,13 +199,22 @@ class InvalidValueEmbeddingVectorDimensionality(ValidationError):
|
|
|
139
199
|
|
|
140
200
|
|
|
141
201
|
class InvalidValueEmbeddingRawDataTooLong(ValidationError):
|
|
202
|
+
"""Raised when embedding raw data exceeds maximum character limit."""
|
|
203
|
+
|
|
142
204
|
def __repr__(self) -> str:
|
|
205
|
+
"""Return a string representation for debugging and logging."""
|
|
143
206
|
return "Invalid_Value_Embedding_Raw_Data_Too_Long"
|
|
144
207
|
|
|
145
208
|
def __init__(self, cols: Iterable) -> None:
|
|
209
|
+
"""Initialize the exception with raw data length validation context.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
cols: Columns with embedding raw data exceeding maximum characters.
|
|
213
|
+
"""
|
|
146
214
|
self.invalid_cols = cols
|
|
147
215
|
|
|
148
216
|
def error_message(self) -> str:
|
|
217
|
+
"""Return the error message for this exception."""
|
|
149
218
|
return (
|
|
150
219
|
f"Embedding raw data cannot have more than {MAX_RAW_DATA_CHARACTERS} characters. "
|
|
151
220
|
"The following columns do not satisfy this condition: "
|
|
@@ -154,14 +223,24 @@ class InvalidValueEmbeddingRawDataTooLong(ValidationError):
|
|
|
154
223
|
|
|
155
224
|
|
|
156
225
|
class InvalidTypeShapValues(ValidationError):
|
|
226
|
+
"""Raised when SHAP value columns have invalid types."""
|
|
227
|
+
|
|
157
228
|
def __repr__(self) -> str:
|
|
229
|
+
"""Return a string representation for debugging and logging."""
|
|
158
230
|
return "Invalid_Type_SHAP_Values"
|
|
159
231
|
|
|
160
|
-
def __init__(self, cols: Iterable, expected_types:
|
|
232
|
+
def __init__(self, cols: Iterable, expected_types: list[str]) -> None:
|
|
233
|
+
"""Initialize the exception with SHAP value type validation context.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
cols: SHAP value columns with unrecognized data types.
|
|
237
|
+
expected_types: List of expected data types for SHAP values.
|
|
238
|
+
"""
|
|
161
239
|
self.wrong_type_columns = cols
|
|
162
240
|
self.expected_types = expected_types
|
|
163
241
|
|
|
164
242
|
def error_message(self) -> str:
|
|
243
|
+
"""Return the error message for this exception."""
|
|
165
244
|
type_list = (
|
|
166
245
|
self.expected_types[0]
|
|
167
246
|
if len(self.expected_types) == 1
|