arize 8.0.0b0__py3-none-any.whl → 8.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +1 -1
- arize/_client_factory.py +50 -0
- arize/_flight/client.py +4 -4
- arize/_generated/api_client/__init__.py +0 -2
- arize/_generated/api_client/api/datasets_api.py +6 -6
- arize/_generated/api_client/api/experiments_api.py +6 -6
- arize/_generated/api_client/api/projects_api.py +3 -3
- arize/_generated/api_client/models/__init__.py +0 -1
- arize/_generated/api_client/models/datasets_create_request.py +2 -10
- arize/_generated/api_client/models/datasets_examples_insert_request.py +2 -10
- arize/_generated/api_client/test/test_datasets_create_request.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_insert_request.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +2 -6
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +2 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +2 -6
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +2 -6
- arize/_generated/api_client_README.md +0 -1
- arize/_lazy.py +25 -9
- arize/client.py +16 -52
- arize/config.py +9 -36
- arize/constants/ml.py +9 -16
- arize/constants/spans.py +5 -10
- arize/datasets/client.py +13 -9
- arize/datasets/errors.py +1 -1
- arize/datasets/validation.py +2 -2
- arize/embeddings/auto_generator.py +2 -2
- arize/embeddings/errors.py +2 -2
- arize/embeddings/tabular_generators.py +1 -1
- arize/exceptions/base.py +0 -52
- arize/exceptions/parameters.py +0 -329
- arize/experiments/__init__.py +2 -2
- arize/experiments/client.py +16 -10
- arize/experiments/evaluators/base.py +6 -6
- arize/experiments/evaluators/executors.py +10 -3
- arize/experiments/evaluators/types.py +2 -2
- arize/experiments/functions.py +24 -17
- arize/experiments/types.py +6 -8
- arize/logging.py +1 -1
- arize/ml/batch_validation/errors.py +10 -1004
- arize/ml/batch_validation/validator.py +273 -225
- arize/ml/casting.py +7 -7
- arize/ml/client.py +12 -11
- arize/ml/proto.py +6 -6
- arize/ml/stream_validation.py +2 -3
- arize/ml/surrogate_explainer/mimic.py +3 -3
- arize/ml/types.py +1 -55
- arize/pre_releases.py +6 -3
- arize/projects/client.py +9 -4
- arize/regions.py +2 -2
- arize/spans/client.py +14 -12
- arize/spans/columns.py +32 -36
- arize/spans/conversion.py +5 -6
- arize/spans/validation/common/argument_validation.py +3 -3
- arize/spans/validation/common/dataframe_form_validation.py +6 -6
- arize/spans/validation/common/value_validation.py +1 -1
- arize/spans/validation/evals/dataframe_form_validation.py +4 -4
- arize/spans/validation/evals/evals_validation.py +6 -6
- arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
- arize/spans/validation/spans/dataframe_form_validation.py +2 -2
- arize/spans/validation/spans/spans_validation.py +6 -6
- arize/utils/arrow.py +2 -2
- arize/utils/cache.py +2 -2
- arize/utils/dataframe.py +4 -4
- arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
- arize/utils/openinference_conversion.py +10 -10
- arize/utils/proto.py +1 -1
- arize/version.py +1 -1
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/METADATA +71 -63
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/RECORD +72 -73
- arize/_generated/api_client/models/primitive_value.py +0 -172
- arize/_generated/api_client/test/test_primitive_value.py +0 -50
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b0.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
|
@@ -17,10 +17,10 @@ if TYPE_CHECKING:
|
|
|
17
17
|
def check_dataframe_index(
|
|
18
18
|
dataframe: pd.DataFrame,
|
|
19
19
|
) -> list[InvalidDataFrameIndex]:
|
|
20
|
-
"""Validates that the DataFrame has a default integer index.
|
|
20
|
+
"""Validates that the :class:`pandas.DataFrame` has a default integer index.
|
|
21
21
|
|
|
22
22
|
Args:
|
|
23
|
-
dataframe: The DataFrame to validate.
|
|
23
|
+
dataframe: The :class:`pandas.DataFrame` to validate.
|
|
24
24
|
|
|
25
25
|
Returns:
|
|
26
26
|
List of validation errors if index is not default (empty if valid).
|
|
@@ -34,10 +34,10 @@ def check_dataframe_required_column_set(
|
|
|
34
34
|
df: pd.DataFrame,
|
|
35
35
|
required_columns: list[str],
|
|
36
36
|
) -> list[InvalidDataFrameMissingColumns]:
|
|
37
|
-
"""Validates that the DataFrame contains all required columns.
|
|
37
|
+
"""Validates that the :class:`pandas.DataFrame` contains all required columns.
|
|
38
38
|
|
|
39
39
|
Args:
|
|
40
|
-
df: The DataFrame to validate.
|
|
40
|
+
df: The :class:`pandas.DataFrame` to validate.
|
|
41
41
|
required_columns: List of column names that must be present.
|
|
42
42
|
|
|
43
43
|
Returns:
|
|
@@ -56,10 +56,10 @@ def check_dataframe_required_column_set(
|
|
|
56
56
|
def check_dataframe_for_duplicate_columns(
|
|
57
57
|
df: pd.DataFrame,
|
|
58
58
|
) -> list[InvalidDataFrameDuplicateColumns]:
|
|
59
|
-
"""Validates that the DataFrame has no duplicate column names.
|
|
59
|
+
"""Validates that the :class:`pandas.DataFrame` has no duplicate column names.
|
|
60
60
|
|
|
61
61
|
Args:
|
|
62
|
-
df: The DataFrame to validate.
|
|
62
|
+
df: The :class:`pandas.DataFrame` to validate.
|
|
63
63
|
|
|
64
64
|
Returns:
|
|
65
65
|
List of validation errors if duplicate columns exist (empty if valid).
|
|
@@ -56,7 +56,7 @@ def check_invalid_model_version(
|
|
|
56
56
|
model_version: The optional model version to validate.
|
|
57
57
|
|
|
58
58
|
Returns:
|
|
59
|
-
List of validation errors if model version is invalid (empty if valid or None).
|
|
59
|
+
List of validation errors if model version is invalid (empty if valid or :obj:`None`).
|
|
60
60
|
"""
|
|
61
61
|
if model_version is None:
|
|
62
62
|
return []
|
|
@@ -27,10 +27,10 @@ def log_info_dataframe_extra_column_names(
|
|
|
27
27
|
"""Logs informational message about columns that don't follow evaluation naming conventions.
|
|
28
28
|
|
|
29
29
|
Args:
|
|
30
|
-
df: DataFrame to check for extra column names, or None
|
|
30
|
+
df: DataFrame to check for extra column names, or :obj:`None`.
|
|
31
31
|
|
|
32
32
|
Returns:
|
|
33
|
-
None
|
|
33
|
+
:obj:`None`.
|
|
34
34
|
"""
|
|
35
35
|
if df is None:
|
|
36
36
|
return
|
|
@@ -57,13 +57,13 @@ def log_info_dataframe_extra_column_names(
|
|
|
57
57
|
def check_dataframe_column_content_type(
|
|
58
58
|
df: pd.DataFrame,
|
|
59
59
|
) -> list[InvalidDataFrameColumnContentTypes]:
|
|
60
|
-
"""Validates that evaluation DataFrame columns contain expected data types.
|
|
60
|
+
"""Validates that evaluation :class:`pandas.DataFrame` columns contain expected data types.
|
|
61
61
|
|
|
62
62
|
Checks that label columns contain strings, score columns contain numbers,
|
|
63
63
|
and explanation columns contain strings.
|
|
64
64
|
|
|
65
65
|
Args:
|
|
66
|
-
df: The DataFrame to validate.
|
|
66
|
+
df: The :class:`pandas.DataFrame` to validate.
|
|
67
67
|
|
|
68
68
|
Returns:
|
|
69
69
|
List of validation errors for columns with incorrect types.
|
|
@@ -55,13 +55,13 @@ def validate_argument_types(
|
|
|
55
55
|
def validate_dataframe_form(
|
|
56
56
|
evals_dataframe: pd.DataFrame,
|
|
57
57
|
) -> list[ValidationError]:
|
|
58
|
-
"""Validate the structure and form of an evaluations DataFrame
|
|
58
|
+
"""Validate the structure and form of an evaluations :class:`pandas.DataFrame`.
|
|
59
59
|
|
|
60
60
|
Args:
|
|
61
|
-
evals_dataframe: The DataFrame containing evaluation data to validate.
|
|
61
|
+
evals_dataframe: The :class:`pandas.DataFrame` containing evaluation data to validate.
|
|
62
62
|
|
|
63
63
|
Returns:
|
|
64
|
-
List of validation errors found in the DataFrame structure.
|
|
64
|
+
List of validation errors found in the :class:`pandas.DataFrame` structure.
|
|
65
65
|
"""
|
|
66
66
|
df_validation.log_info_dataframe_extra_column_names(evals_dataframe)
|
|
67
67
|
checks = chain(
|
|
@@ -84,15 +84,15 @@ def validate_values(
|
|
|
84
84
|
project_name: str,
|
|
85
85
|
model_version: str | None = None,
|
|
86
86
|
) -> list[ValidationError]:
|
|
87
|
-
"""Validate the values within an evaluations DataFrame
|
|
87
|
+
"""Validate the values within an evaluations :class:`pandas.DataFrame`.
|
|
88
88
|
|
|
89
89
|
Args:
|
|
90
|
-
evals_dataframe: The DataFrame containing evaluation data to validate.
|
|
90
|
+
evals_dataframe: The :class:`pandas.DataFrame` containing evaluation data to validate.
|
|
91
91
|
project_name: The project name associated with the evaluations.
|
|
92
92
|
model_version: Optional model version. Defaults to None.
|
|
93
93
|
|
|
94
94
|
Returns:
|
|
95
|
-
List of validation errors found in DataFrame values.
|
|
95
|
+
List of validation errors found in :class:`pandas.DataFrame` values.
|
|
96
96
|
"""
|
|
97
97
|
checks = chain(
|
|
98
98
|
# Common
|
|
@@ -7,7 +7,7 @@ from arize.spans.columns import SPAN_SPAN_ID_COL
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class MetadataFormError(ValidationError):
|
|
10
|
-
"""Raised when metadata DataFrame structure or format is invalid."""
|
|
10
|
+
"""Raised when metadata :class:`pandas.DataFrame` structure or format is invalid."""
|
|
11
11
|
|
|
12
12
|
def __init__(self, message: str, resolution: str) -> None:
|
|
13
13
|
"""Initialize the exception with metadata form error context.
|
|
@@ -50,13 +50,13 @@ def log_info_dataframe_extra_column_names(
|
|
|
50
50
|
def check_dataframe_column_content_type(
|
|
51
51
|
df: pd.DataFrame,
|
|
52
52
|
) -> list[InvalidDataFrameColumnContentTypes]:
|
|
53
|
-
"""Validates
|
|
53
|
+
"""Validates span :class:`pandas.DataFrame` columns match OpenInference types.
|
|
54
54
|
|
|
55
55
|
Checks that columns have appropriate data types: lists of dicts, dicts, numeric,
|
|
56
56
|
boolean, timestamp, JSON strings, or plain strings based on column specifications.
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
|
-
df: The DataFrame to validate.
|
|
59
|
+
df: The :class:`pandas.DataFrame` to validate.
|
|
60
60
|
|
|
61
61
|
Returns:
|
|
62
62
|
List of validation errors for columns with incorrect types.
|
|
@@ -56,13 +56,13 @@ def validate_argument_types(
|
|
|
56
56
|
def validate_dataframe_form(
|
|
57
57
|
spans_dataframe: pd.DataFrame,
|
|
58
58
|
) -> list[ValidationError]:
|
|
59
|
-
"""Validate the structure and form of a spans DataFrame
|
|
59
|
+
"""Validate the structure and form of a spans :class:`pandas.DataFrame`.
|
|
60
60
|
|
|
61
61
|
Args:
|
|
62
|
-
spans_dataframe: The DataFrame containing spans data to validate.
|
|
62
|
+
spans_dataframe: The :class:`pandas.DataFrame` containing spans data to validate.
|
|
63
63
|
|
|
64
64
|
Returns:
|
|
65
|
-
List of validation errors found in the DataFrame structure.
|
|
65
|
+
List of validation errors found in the :class:`pandas.DataFrame` structure.
|
|
66
66
|
"""
|
|
67
67
|
df_validation.log_info_dataframe_extra_column_names(spans_dataframe)
|
|
68
68
|
checks = chain(
|
|
@@ -88,15 +88,15 @@ def validate_values(
|
|
|
88
88
|
project_name: str,
|
|
89
89
|
model_version: str | None = None,
|
|
90
90
|
) -> list[ValidationError]:
|
|
91
|
-
"""Validate the values within a spans DataFrame
|
|
91
|
+
"""Validate the values within a spans :class:`pandas.DataFrame`.
|
|
92
92
|
|
|
93
93
|
Args:
|
|
94
|
-
spans_dataframe: The DataFrame containing spans data to validate.
|
|
94
|
+
spans_dataframe: The :class:`pandas.DataFrame` containing spans data to validate.
|
|
95
95
|
project_name: The project name associated with the spans.
|
|
96
96
|
model_version: Optional model version. Defaults to None.
|
|
97
97
|
|
|
98
98
|
Returns:
|
|
99
|
-
List of validation errors found in DataFrame values.
|
|
99
|
+
List of validation errors found in :class:`pandas.DataFrame` values.
|
|
100
100
|
"""
|
|
101
101
|
checks = chain(
|
|
102
102
|
# Common
|
arize/utils/arrow.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# type: ignore[pb2]
|
|
1
2
|
"""Apache Arrow utilities for data serialization and file operations."""
|
|
2
3
|
|
|
3
|
-
# type: ignore[pb2]
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import base64
|
|
@@ -38,7 +38,7 @@ def post_arrow_table(
|
|
|
38
38
|
pa_table: The PyArrow table containing the data.
|
|
39
39
|
proto_schema: The protobuf schema for the data.
|
|
40
40
|
headers: HTTP headers for the request.
|
|
41
|
-
timeout: Request timeout in seconds, or None for no timeout.
|
|
41
|
+
timeout: Request timeout in seconds, or :obj:`None` for no timeout.
|
|
42
42
|
verify: Whether to verify SSL certificates.
|
|
43
43
|
max_chunksize: Maximum chunk size for splitting large tables.
|
|
44
44
|
tmp_dir: Temporary directory for serialization. Defaults to "".
|
arize/utils/cache.py
CHANGED
|
@@ -31,7 +31,7 @@ def load_cached_resource(
|
|
|
31
31
|
format: File format for cached data. Defaults to "parquet".
|
|
32
32
|
|
|
33
33
|
Returns:
|
|
34
|
-
The cached DataFrame if found and valid, None otherwise.
|
|
34
|
+
The cached :class:`pandas.DataFrame` if found and valid, :obj:`None` otherwise.
|
|
35
35
|
"""
|
|
36
36
|
key = _get_cache_key(resource, resource_id, resource_updated_at)
|
|
37
37
|
filepath = _get_abs_file_path(cache_dir, f"{key}.{format}", resource)
|
|
@@ -59,7 +59,7 @@ def cache_resource(
|
|
|
59
59
|
resource: Resource type name (e.g., "dataset", "experiment").
|
|
60
60
|
resource_id: Unique identifier for the resource.
|
|
61
61
|
resource_updated_at: Optional timestamp of last resource update.
|
|
62
|
-
resource_data: DataFrame containing the resource data.
|
|
62
|
+
resource_data: :class:`pandas.DataFrame` containing the resource data.
|
|
63
63
|
format: File format for cached data. Defaults to "parquet".
|
|
64
64
|
"""
|
|
65
65
|
key = _get_cache_key(resource, resource_id, resource_updated_at)
|
arize/utils/dataframe.py
CHANGED
|
@@ -9,10 +9,10 @@ from arize.ml.types import BaseSchema
|
|
|
9
9
|
|
|
10
10
|
# Resets the dataframe index if it is not a RangeIndex
|
|
11
11
|
def reset_dataframe_index(dataframe: pd.DataFrame) -> None:
|
|
12
|
-
"""Reset the DataFrame index in-place if it is not a RangeIndex.
|
|
12
|
+
"""Reset the :class:`pandas.DataFrame` index in-place if it is not a RangeIndex.
|
|
13
13
|
|
|
14
14
|
Args:
|
|
15
|
-
dataframe: The pandas
|
|
15
|
+
dataframe: The :class:`pandas.DataFrame` to reset.
|
|
16
16
|
"""
|
|
17
17
|
if not isinstance(dataframe.index, pd.RangeIndex):
|
|
18
18
|
drop = dataframe.index.name in dataframe.columns
|
|
@@ -25,10 +25,10 @@ def remove_extraneous_columns(
|
|
|
25
25
|
column_list: list[str] | None = None,
|
|
26
26
|
regex: str | None = None,
|
|
27
27
|
) -> pd.DataFrame:
|
|
28
|
-
"""Filter DataFrame to keep only relevant columns based on schema, list, or regex.
|
|
28
|
+
"""Filter :class:`pandas.DataFrame` to keep only relevant columns based on schema, list, or regex.
|
|
29
29
|
|
|
30
30
|
Args:
|
|
31
|
-
df: The pandas
|
|
31
|
+
df: The :class:`pandas.DataFrame` to filter.
|
|
32
32
|
schema: Optional schema defining used columns. Defaults to None.
|
|
33
33
|
column_list: Optional explicit list of columns to keep. Defaults to None.
|
|
34
34
|
regex: Optional regex pattern to match column names. Defaults to None.
|
|
@@ -10,7 +10,7 @@ logger = logging.getLogger(__name__)
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
class ColumnNotFoundError(Exception):
|
|
13
|
-
"""Raised when a specified column is not found in the DataFrame
|
|
13
|
+
"""Raised when a specified column is not found in the :class:`pandas.DataFrame`."""
|
|
14
14
|
|
|
15
15
|
def __init__(self, attribute: str) -> None:
|
|
16
16
|
"""Initialize with the attribute that couldn't be mapped to a column.
|
|
@@ -27,13 +27,13 @@ class ColumnNotFoundError(Exception):
|
|
|
27
27
|
def extract_nested_data_to_column(
|
|
28
28
|
attributes: list[str], df: pd.DataFrame
|
|
29
29
|
) -> pd.DataFrame:
|
|
30
|
-
"""Extract nested attributes from complex data structures into new DataFrame columns.
|
|
30
|
+
"""Extract nested attributes from complex data structures into new :class:`pandas.DataFrame` columns.
|
|
31
31
|
|
|
32
32
|
This function, used in Online Tasks, is typically run on data exported from Arize.
|
|
33
|
-
It prepares the DataFrame by extracting relevant attributes from complex, deeply
|
|
33
|
+
It prepares the :class:`pandas.DataFrame` by extracting relevant attributes from complex, deeply
|
|
34
34
|
nested data structures, such as those found in LLM outputs or JSON-like records.
|
|
35
35
|
It helps extract specific values from these nested structures by identifying the
|
|
36
|
-
longest matching column name in the DataFrame and recursively accessing the desired
|
|
36
|
+
longest matching column name in the :class:`pandas.DataFrame` and recursively accessing the desired
|
|
37
37
|
attribute path within each row. This preprocessing step ensures that the extracted
|
|
38
38
|
values are available as new columns, allowing evaluators to process and assess
|
|
39
39
|
these values effectively.
|
|
@@ -127,7 +127,7 @@ def _introspect_arize_attribute(value: object, attribute: str) -> object:
|
|
|
127
127
|
attribute: "0.message.content"
|
|
128
128
|
Returns: 'The capital of China is Beijing.'
|
|
129
129
|
|
|
130
|
-
- Returns None immediately when a key or index is not found
|
|
130
|
+
- Returns :obj:`None` immediately when a key or index is not found
|
|
131
131
|
- Handles integer parts for lists
|
|
132
132
|
- Parses JSON strings
|
|
133
133
|
- Converts NumPy arrays to lists
|
|
@@ -174,10 +174,10 @@ def _parse_value(
|
|
|
174
174
|
2) Else if `current_value` is a dict, check if `attribute_parts_unprocessed[0]` is a key.
|
|
175
175
|
If not found, try combining `attribute_parts_unprocessed[0] + '.' + attribute_parts_unprocessed[1]`...
|
|
176
176
|
to handle dotted keys in the dict.
|
|
177
|
-
3) If none match, return (None
|
|
177
|
+
3) If none match, return (:obj:`None`, 1) to signal "not found, consume 1 part."
|
|
178
178
|
|
|
179
179
|
Returns (parsed_value, num_parts_processed):
|
|
180
|
-
- parsed_value: the found value or None if not found
|
|
180
|
+
- parsed_value: the found value or :obj:`None` if not found
|
|
181
181
|
- num_parts_processed: how many parts were processed (1 or more)
|
|
182
182
|
"""
|
|
183
183
|
if not attribute_parts_unprocessed:
|
|
@@ -11,13 +11,13 @@ logger = logging.getLogger(__name__)
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def convert_datetime_columns_to_int(df: pd.DataFrame) -> pd.DataFrame:
|
|
14
|
-
"""Convert datetime columns in a DataFrame to milliseconds since epoch.
|
|
14
|
+
"""Convert datetime columns in a :class:`pandas.DataFrame` to milliseconds since epoch.
|
|
15
15
|
|
|
16
16
|
Args:
|
|
17
|
-
df: The pandas
|
|
17
|
+
df: The :class:`pandas.DataFrame` to convert.
|
|
18
18
|
|
|
19
19
|
Returns:
|
|
20
|
-
The DataFrame with datetime columns converted to integers.
|
|
20
|
+
The :class:`pandas.DataFrame` with datetime columns converted to integers.
|
|
21
21
|
"""
|
|
22
22
|
for col in df.select_dtypes(
|
|
23
23
|
include=["datetime64[ns]", "datetime64[ns, UTC]"]
|
|
@@ -27,13 +27,13 @@ def convert_datetime_columns_to_int(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def convert_boolean_columns_to_str(df: pd.DataFrame) -> pd.DataFrame:
|
|
30
|
-
"""Convert boolean columns in a DataFrame to string type.
|
|
30
|
+
"""Convert boolean columns in a :class:`pandas.DataFrame` to string type.
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
df: The pandas
|
|
33
|
+
df: The :class:`pandas.DataFrame` to convert.
|
|
34
34
|
|
|
35
35
|
Returns:
|
|
36
|
-
The DataFrame with boolean columns converted to strings.
|
|
36
|
+
The :class:`pandas.DataFrame` with boolean columns converted to strings.
|
|
37
37
|
"""
|
|
38
38
|
for col in df.columns:
|
|
39
39
|
if df[col].dtype == "bool":
|
|
@@ -45,10 +45,10 @@ def convert_default_columns_to_json_str(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
45
45
|
"""Convert dictionary values in specific columns to JSON strings.
|
|
46
46
|
|
|
47
47
|
Args:
|
|
48
|
-
df: The pandas
|
|
48
|
+
df: The :class:`pandas.DataFrame` to convert.
|
|
49
49
|
|
|
50
50
|
Returns:
|
|
51
|
-
The DataFrame with dictionaries in eligible columns converted to JSON strings.
|
|
51
|
+
The :class:`pandas.DataFrame` with dictionaries in eligible columns converted to JSON strings.
|
|
52
52
|
"""
|
|
53
53
|
for col in df.columns:
|
|
54
54
|
if _should_convert_json(col):
|
|
@@ -68,10 +68,10 @@ def convert_json_str_to_dict(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
68
68
|
"""Convert JSON string values in specific columns to Python dictionaries.
|
|
69
69
|
|
|
70
70
|
Args:
|
|
71
|
-
df: The pandas
|
|
71
|
+
df: The :class:`pandas.DataFrame` to convert.
|
|
72
72
|
|
|
73
73
|
Returns:
|
|
74
|
-
The DataFrame with JSON strings in eligible columns converted to dictionaries.
|
|
74
|
+
The :class:`pandas.DataFrame` with JSON strings in eligible columns converted to dictionaries.
|
|
75
75
|
"""
|
|
76
76
|
for col in df.columns:
|
|
77
77
|
if _should_convert_json(col):
|
arize/utils/proto.py
CHANGED
arize/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: arize
|
|
3
|
-
Version: 8.0.
|
|
3
|
+
Version: 8.0.0b2
|
|
4
4
|
Summary: A helper library to interact with Arize AI APIs
|
|
5
5
|
Project-URL: Homepage, https://arize.com
|
|
6
6
|
Project-URL: Documentation, https://docs.arize.com/arize
|
|
@@ -26,47 +26,36 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
26
26
|
Classifier: Topic :: System :: Logging
|
|
27
27
|
Classifier: Topic :: System :: Monitoring
|
|
28
28
|
Requires-Python: >=3.10
|
|
29
|
-
Requires-Dist: lazy-imports
|
|
30
29
|
Requires-Dist: numpy>=2.0.0
|
|
31
|
-
|
|
32
|
-
Requires-Dist:
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Requires-Dist:
|
|
35
|
-
Requires-Dist:
|
|
36
|
-
Requires-Dist:
|
|
37
|
-
Requires-Dist:
|
|
38
|
-
|
|
39
|
-
Requires-Dist:
|
|
40
|
-
Requires-Dist:
|
|
41
|
-
Requires-Dist:
|
|
42
|
-
Requires-Dist:
|
|
43
|
-
Requires-Dist:
|
|
44
|
-
Requires-Dist:
|
|
45
|
-
Requires-Dist:
|
|
46
|
-
Requires-Dist:
|
|
47
|
-
Requires-Dist: wrapt<2.0.0,>=1.0.0; extra == 'datasets-experiments'
|
|
30
|
+
Requires-Dist: openinference-semantic-conventions<1,>=0.1.25
|
|
31
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-common>=1.38.0
|
|
32
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.38.0
|
|
33
|
+
Requires-Dist: opentelemetry-sdk>=1.38.0
|
|
34
|
+
Requires-Dist: opentelemetry-semantic-conventions<1,>=0.43b0
|
|
35
|
+
Requires-Dist: pandas<3,>=2.0.0
|
|
36
|
+
Requires-Dist: protobuf<6,>=4.21.0
|
|
37
|
+
Requires-Dist: pyarrow>=0.15.0
|
|
38
|
+
Requires-Dist: pydantic<3,>=2
|
|
39
|
+
Requires-Dist: python-dateutil<3,>=2.8.2
|
|
40
|
+
Requires-Dist: requests-futures<2,>=1.0.0
|
|
41
|
+
Requires-Dist: requests<3,>=2.0.0
|
|
42
|
+
Requires-Dist: tqdm<5,>4
|
|
43
|
+
Requires-Dist: typing-extensions<5,>=4.7.1
|
|
44
|
+
Requires-Dist: urllib3<3,>=2.1.0
|
|
45
|
+
Requires-Dist: wrapt<2.0.0,>=1.0.0
|
|
48
46
|
Provides-Extra: dev
|
|
49
47
|
Requires-Dist: pytest==8.4.2; extra == 'dev'
|
|
50
48
|
Requires-Dist: ruff==0.13.2; extra == 'dev'
|
|
51
|
-
Provides-Extra:
|
|
52
|
-
Requires-Dist:
|
|
53
|
-
|
|
54
|
-
Requires-Dist:
|
|
55
|
-
Requires-Dist:
|
|
56
|
-
Requires-Dist:
|
|
57
|
-
|
|
58
|
-
Requires-Dist:
|
|
59
|
-
Provides-Extra:
|
|
60
|
-
Requires-Dist:
|
|
61
|
-
Requires-Dist: requests-futures<2,>=1.0.0; extra == 'ml-stream'
|
|
62
|
-
Provides-Extra: spans
|
|
63
|
-
Requires-Dist: openinference-semantic-conventions<1,>=0.1.21; extra == 'spans'
|
|
64
|
-
Requires-Dist: opentelemetry-semantic-conventions<1,>=0.43b0; extra == 'spans'
|
|
65
|
-
Requires-Dist: pandas<3,>=1.0.0; extra == 'spans'
|
|
66
|
-
Requires-Dist: protobuf<6,>=4.21.0; extra == 'spans'
|
|
67
|
-
Requires-Dist: pyarrow>=0.15.0; extra == 'spans'
|
|
68
|
-
Requires-Dist: requests<3,>=2.0.0; extra == 'spans'
|
|
69
|
-
Requires-Dist: tqdm; extra == 'spans'
|
|
49
|
+
Provides-Extra: embeddings
|
|
50
|
+
Requires-Dist: datasets!=2.14.*,<3,>=2.8; extra == 'embeddings'
|
|
51
|
+
Requires-Dist: pillow<11,>=8.4.0; extra == 'embeddings'
|
|
52
|
+
Requires-Dist: tokenizers<1,>=0.13; extra == 'embeddings'
|
|
53
|
+
Requires-Dist: torch<3,>=1.13; extra == 'embeddings'
|
|
54
|
+
Requires-Dist: transformers<5,>=4.25; extra == 'embeddings'
|
|
55
|
+
Provides-Extra: mimic
|
|
56
|
+
Requires-Dist: interpret-community[mimic]<1,>=0.22.0; extra == 'mimic'
|
|
57
|
+
Provides-Extra: otel
|
|
58
|
+
Requires-Dist: arize-otel<1,>=0.11.0; extra == 'otel'
|
|
70
59
|
Description-Content-Type: text/markdown
|
|
71
60
|
|
|
72
61
|
<p align="center">
|
|
@@ -92,6 +81,8 @@ Description-Content-Type: text/markdown
|
|
|
92
81
|
- [Overview](#overview)
|
|
93
82
|
- [Key Features](#key-features)
|
|
94
83
|
- [Installation](#installation)
|
|
84
|
+
- [Optional Dependencies](#optional-dependencies)
|
|
85
|
+
- [Migrating from Version 7](#migrating-from-version-7)
|
|
95
86
|
- [Usage](#usage)
|
|
96
87
|
- [Instrumentation](#instrumentation)
|
|
97
88
|
- [Operations on Spans](#operations-on-spans)
|
|
@@ -130,16 +121,18 @@ Description-Content-Type: text/markdown
|
|
|
130
121
|
|
|
131
122
|
A helper package to interact with Arize AI APIs.
|
|
132
123
|
|
|
133
|
-
Arize is an AI engineering platform. It helps engineers develop, evaluate, and observe AI applications and agents.
|
|
124
|
+
Arize is an AI engineering platform. It helps engineers develop, evaluate, and observe AI applications and agents.
|
|
125
|
+
|
|
126
|
+
Arize has both Enterprise and OSS products to support this goal:
|
|
134
127
|
|
|
135
|
-
Arize has both Enterprise and OSS products to support this goal:
|
|
136
128
|
- [Arize AX](https://arize.com/) — an enterprise AI engineering platform from development to production, with an embedded AI Copilot
|
|
137
129
|
- [Phoenix](https://github.com/Arize-ai/phoenix) — a lightweight, open-source project for tracing, prompt engineering, and evaluation
|
|
138
130
|
- [OpenInference](https://github.com/Arize-ai/openinference) — an open-source instrumentation package to trace LLM applications across models and frameworks
|
|
139
131
|
|
|
140
|
-
We log over 1 trillion inferences and spans, 10 million evaluation runs, and 2 million OSS downloads every month.
|
|
132
|
+
We log over 1 trillion inferences and spans, 10 million evaluation runs, and 2 million OSS downloads every month.
|
|
141
133
|
|
|
142
134
|
# Key Features
|
|
135
|
+
|
|
143
136
|
- [**_Tracing_**](https://docs.arize.com/arize/observe/tracing) - Trace your LLM application's runtime using OpenTelemetry-based instrumentation.
|
|
144
137
|
- [**_Evaluation_**](https://docs.arize.com/arize/evaluate/online-evals) - Leverage LLMs to benchmark your application's performance using response and retrieval evals.
|
|
145
138
|
- [**_Datasets_**](https://docs.arize.com/arize/develop/datasets) - Create versioned datasets of examples for experimentation, evaluation, and fine-tuning.
|
|
@@ -149,19 +142,36 @@ We log over 1 trillion inferences and spans, 10 million evaluation runs, and 2 m
|
|
|
149
142
|
|
|
150
143
|
# Installation
|
|
151
144
|
|
|
152
|
-
Install
|
|
145
|
+
Install the base package:
|
|
153
146
|
|
|
154
147
|
```bash
|
|
155
|
-
pip install arize
|
|
148
|
+
pip install arize
|
|
156
149
|
```
|
|
157
|
-
|
|
150
|
+
|
|
151
|
+
## Optional Dependencies
|
|
152
|
+
|
|
153
|
+
The following optional extras provide specialized functionality:
|
|
154
|
+
|
|
155
|
+
> **Note:** The `otel` extra installs the `arize-otel` package, which is also available as a standalone package. If you only need auto-instrumentation without the full SDK, install `arize-otel` directly.
|
|
156
|
+
|
|
157
|
+
| Extra | Install Command | What It Provides |
|
|
158
|
+
|-------|----------------|------------------|
|
|
159
|
+
| **otel** | `pip install arize[otel]` | OpenTelemetry auto-instrumentation package (arize-otel) for automatic tracing |
|
|
160
|
+
| **embeddings** | `pip install arize[embeddings]` | Automatic embedding generation for NLP, CV, and structured data (Pillow, datasets, tokenizers, torch, transformers) |
|
|
161
|
+
| **mimic** | `pip install arize[mimic]` | MIMIC explainer for model interpretability |
|
|
162
|
+
|
|
163
|
+
Install multiple extras:
|
|
158
164
|
|
|
159
165
|
```bash
|
|
160
|
-
pip install arize
|
|
166
|
+
pip install arize[otel,embeddings,mimic]
|
|
161
167
|
```
|
|
162
168
|
|
|
169
|
+
## Migrating from Version 7
|
|
170
|
+
|
|
171
|
+
If you're upgrading from version 7, please refer to the [Migration Guide](https://arize.com/docs/api-clients/python/version-8/migration) for detailed migration steps and breaking changes.
|
|
172
|
+
|
|
163
173
|
# Usage
|
|
164
|
-
|
|
174
|
+
|
|
165
175
|
## Instrumentation
|
|
166
176
|
|
|
167
177
|
See [arize-otel in PyPI](https://pypi.org/project/arize-otel/):
|
|
@@ -183,9 +193,8 @@ OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
|
|
|
183
193
|
|
|
184
194
|
## Operations on Spans
|
|
185
195
|
|
|
186
|
-
Use `arize.spans` to interact with spans: log spans into Arize, update the span's
|
|
187
|
-
|
|
188
|
-
> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
|
|
196
|
+
Use `arize.spans` to interact with spans: log spans into Arize, update the span's
|
|
197
|
+
evaluations, annotations and metadata in bulk.
|
|
189
198
|
|
|
190
199
|
### Logging spans
|
|
191
200
|
|
|
@@ -259,9 +268,8 @@ df = client.spans.export_to_df(
|
|
|
259
268
|
|
|
260
269
|
## Operations on ML Models
|
|
261
270
|
|
|
262
|
-
Use `arize.
|
|
263
|
-
|
|
264
|
-
> **WARNING**: This is currently under an alpha release. Install with `pip install arize==8.0.0ax` where the `x` denotes the specific alpha version. Check the [pre-releases](https://pypi.org/project/arize/#history) page in PyPI.
|
|
271
|
+
Use `arize.ml` to interact with ML models: log ML data (training, validation, production)
|
|
272
|
+
into Arize, either streaming or in batches.
|
|
265
273
|
|
|
266
274
|
### Stream log ML Data for a Classification use-case
|
|
267
275
|
|
|
@@ -276,7 +284,7 @@ MODEL_NAME = "<your-model-name>"
|
|
|
276
284
|
features=...
|
|
277
285
|
embedding_features=...
|
|
278
286
|
|
|
279
|
-
response = client.
|
|
287
|
+
response = client.ml.log_stream(
|
|
280
288
|
space_id=SPACE_ID,
|
|
281
289
|
model_name=MODEL_NAME,
|
|
282
290
|
model_type=ModelTypes.SCORE_CATEGORICAL,
|
|
@@ -328,7 +336,7 @@ schema = Schema(
|
|
|
328
336
|
)
|
|
329
337
|
|
|
330
338
|
# Logging Production DataFrame
|
|
331
|
-
response = client.
|
|
339
|
+
response = client.ml.log_batch(
|
|
332
340
|
space_id=SPACE_ID,
|
|
333
341
|
model_name=MODEL_NAME,
|
|
334
342
|
model_type=ModelTypes.OBJECT_DETECTION,
|
|
@@ -356,7 +364,7 @@ SPACE_ID = "<your-space-id>"
|
|
|
356
364
|
MODEL_NAME = "<your-model-name>"
|
|
357
365
|
MODEL_VERSION = "1.0"
|
|
358
366
|
|
|
359
|
-
df = client.
|
|
367
|
+
df = client.ml.export_to_df(
|
|
360
368
|
space_id=SPACE_ID,
|
|
361
369
|
model_name=MODEL_NAME,
|
|
362
370
|
environment=Environments.TRAINING,
|
|
@@ -414,7 +422,7 @@ The response is an object of type `DatasetsList200Response`, and you can access
|
|
|
414
422
|
|
|
415
423
|
```python
|
|
416
424
|
# Get the list of datasets from the response
|
|
417
|
-
dataset_list = resp.datasets
|
|
425
|
+
dataset_list = resp.datasets
|
|
418
426
|
# Get the response as a dictionary
|
|
419
427
|
resp_dict = resp.to_dict()
|
|
420
428
|
# Get the response in JSON format
|
|
@@ -530,7 +538,7 @@ The response is an object of type `ExperimentsList200Response`, and you can acce
|
|
|
530
538
|
|
|
531
539
|
```python
|
|
532
540
|
# Get the list of datasets from the response
|
|
533
|
-
experiment_list = resp.experiments
|
|
541
|
+
experiment_list = resp.experiments
|
|
534
542
|
# Get the response as a dictionary
|
|
535
543
|
resp_dict = resp.to_dict()
|
|
536
544
|
# Get the response in JSON format
|
|
@@ -554,7 +562,7 @@ experiment, experiment_df = client.run_experiment(
|
|
|
554
562
|
concurrency=..., # The number of concurrent tasks to run. Defaults to 3.
|
|
555
563
|
set_global_tracer_provider=..., # If True, sets the global tracer provider for the experiment. Defaults to False
|
|
556
564
|
exit_on_error=..., # If True, the experiment will stop running on first occurrence of an error. Defaults to False
|
|
557
|
-
)
|
|
565
|
+
)
|
|
558
566
|
```
|
|
559
567
|
|
|
560
568
|
The `Experiment` object also counts with convenience method similar to `List***` objects:
|
|
@@ -649,7 +657,7 @@ resp_df = resp.to_df()
|
|
|
649
657
|
|
|
650
658
|
### In Code
|
|
651
659
|
|
|
652
|
-
You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
|
|
660
|
+
You can use `configure_logging` to set up the logging behavior of the Arize package to your needs.
|
|
653
661
|
|
|
654
662
|
```python
|
|
655
663
|
from arize.logging import configure_logging
|
|
@@ -657,7 +665,7 @@ from arize.logging import configure_logging
|
|
|
657
665
|
configure_logging(
|
|
658
666
|
level=..., # Defaults to logging.INFO
|
|
659
667
|
structured=..., # if True, emit JSON logs. Defaults to False
|
|
660
|
-
)
|
|
668
|
+
)
|
|
661
669
|
```
|
|
662
670
|
|
|
663
671
|
### Via Environment Variables
|
|
@@ -668,11 +676,11 @@ Configure the same options as the section above, via:
|
|
|
668
676
|
import os
|
|
669
677
|
|
|
670
678
|
# Whether or not you want to disable logging altogether
|
|
671
|
-
os.environ["ARIZE_LOG_ENABLE"] = "true"
|
|
679
|
+
os.environ["ARIZE_LOG_ENABLE"] = "true"
|
|
672
680
|
# Set up the logging level
|
|
673
|
-
os.environ["ARIZE_LOG_LEVEL"] = "debug"
|
|
681
|
+
os.environ["ARIZE_LOG_LEVEL"] = "debug"
|
|
674
682
|
# Whether or not you want structured JSON logs
|
|
675
|
-
os.environ["ARIZE_LOG_STRUCTURED"] = "false"
|
|
683
|
+
os.environ["ARIZE_LOG_STRUCTURED"] = "false"
|
|
676
684
|
```
|
|
677
685
|
|
|
678
686
|
The default behavior of Arize's logs is: enabled, `INFO` level, and not structured.
|