arize 8.0.0b1__py3-none-any.whl → 8.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +1 -1
- arize/_client_factory.py +50 -0
- arize/_flight/client.py +4 -4
- arize/_generated/api_client/api/datasets_api.py +6 -6
- arize/_generated/api_client/api/experiments_api.py +6 -6
- arize/_generated/api_client/api/projects_api.py +3 -3
- arize/_lazy.py +25 -9
- arize/client.py +6 -16
- arize/config.py +9 -36
- arize/constants/ml.py +9 -16
- arize/constants/spans.py +5 -10
- arize/datasets/client.py +13 -9
- arize/datasets/errors.py +1 -1
- arize/datasets/validation.py +2 -2
- arize/embeddings/auto_generator.py +2 -2
- arize/embeddings/errors.py +2 -2
- arize/embeddings/tabular_generators.py +1 -1
- arize/exceptions/base.py +0 -52
- arize/exceptions/parameters.py +0 -329
- arize/experiments/client.py +14 -7
- arize/experiments/evaluators/base.py +6 -6
- arize/experiments/evaluators/executors.py +10 -3
- arize/experiments/evaluators/types.py +2 -2
- arize/experiments/functions.py +18 -11
- arize/experiments/types.py +3 -5
- arize/logging.py +1 -1
- arize/ml/batch_validation/errors.py +10 -1004
- arize/ml/batch_validation/validator.py +273 -225
- arize/ml/casting.py +7 -7
- arize/ml/client.py +12 -11
- arize/ml/proto.py +6 -6
- arize/ml/stream_validation.py +2 -3
- arize/ml/surrogate_explainer/mimic.py +3 -3
- arize/ml/types.py +1 -55
- arize/pre_releases.py +6 -3
- arize/projects/client.py +9 -4
- arize/regions.py +2 -2
- arize/spans/client.py +13 -11
- arize/spans/columns.py +32 -36
- arize/spans/conversion.py +5 -6
- arize/spans/validation/common/argument_validation.py +3 -3
- arize/spans/validation/common/dataframe_form_validation.py +6 -6
- arize/spans/validation/common/value_validation.py +1 -1
- arize/spans/validation/evals/dataframe_form_validation.py +4 -4
- arize/spans/validation/evals/evals_validation.py +6 -6
- arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
- arize/spans/validation/spans/dataframe_form_validation.py +2 -2
- arize/spans/validation/spans/spans_validation.py +6 -6
- arize/utils/arrow.py +2 -2
- arize/utils/cache.py +2 -2
- arize/utils/dataframe.py +4 -4
- arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
- arize/utils/openinference_conversion.py +10 -10
- arize/utils/proto.py +1 -1
- arize/version.py +1 -1
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/METADATA +23 -6
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/RECORD +60 -59
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
arize/ml/casting.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# type: ignore[pb2]
|
|
1
2
|
"""Type casting utilities for ML model data conversion."""
|
|
2
3
|
|
|
3
|
-
# type: ignore[pb2]
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import math
|
|
@@ -132,12 +132,12 @@ def cast_typed_columns(
|
|
|
132
132
|
a column across many SDK uploads.
|
|
133
133
|
|
|
134
134
|
Args:
|
|
135
|
-
dataframe (
|
|
135
|
+
dataframe (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
|
|
136
136
|
schema (Schema): The schema, which may include feature and tag column names
|
|
137
137
|
in a TypedColumns object or a List[string].
|
|
138
138
|
|
|
139
139
|
Returns:
|
|
140
|
-
tuple[
|
|
140
|
+
tuple[:class:`pandas.DataFrame`, Schema]: A tuple containing:
|
|
141
141
|
- dataframe: The dataframe, with columns cast to the specified types.
|
|
142
142
|
- schema: A new Schema object, with feature and tag column names converted
|
|
143
143
|
to the List[string] format expected in downstream validation.
|
|
@@ -290,12 +290,12 @@ def _cast_columns(
|
|
|
290
290
|
(feature_column_names or tag_column_names)
|
|
291
291
|
|
|
292
292
|
Args:
|
|
293
|
-
dataframe (
|
|
293
|
+
dataframe (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
|
|
294
294
|
columns (TypedColumns): The TypedColumns object, which specifies the columns
|
|
295
295
|
to cast (and/or to not cast) and their target types.
|
|
296
296
|
|
|
297
297
|
Returns:
|
|
298
|
-
|
|
298
|
+
:class:`pandas.DataFrame`: The dataframe with columns cast to the specified types.
|
|
299
299
|
|
|
300
300
|
Raises:
|
|
301
301
|
ColumnCastingError: If casting fails.
|
|
@@ -350,12 +350,12 @@ def _cast_df(
|
|
|
350
350
|
"""Cast columns in a dataframe to the specified type.
|
|
351
351
|
|
|
352
352
|
Args:
|
|
353
|
-
df (
|
|
353
|
+
df (:class:`pandas.DataFrame`): A deepcopy of the user's dataframe.
|
|
354
354
|
cols (list[str]): The list of column names to cast.
|
|
355
355
|
target_type_str (str): The target type to cast to.
|
|
356
356
|
|
|
357
357
|
Returns:
|
|
358
|
-
|
|
358
|
+
:class:`pandas.DataFrame`: The dataframe with columns cast to the specified types.
|
|
359
359
|
|
|
360
360
|
Raises:
|
|
361
361
|
Exception: If casting fails. Common exceptions raised by astype() are
|
arize/ml/client.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# type: ignore[pb2]
|
|
1
2
|
"""Client implementation for managing ML models in the Arize platform."""
|
|
2
3
|
|
|
3
|
-
# type: ignore[pb2]
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import copy
|
|
@@ -542,7 +542,7 @@ class MLModelsClient:
|
|
|
542
542
|
timeout: float | None = None,
|
|
543
543
|
tmp_dir: str = "",
|
|
544
544
|
) -> requests.Response:
|
|
545
|
-
"""Log a batch of model predictions and actuals to Arize from a pandas
|
|
545
|
+
"""Log a batch of model predictions and actuals to Arize from a :class:`pandas.DataFrame`.
|
|
546
546
|
|
|
547
547
|
This method uploads multiple records to Arize in a single batch operation using
|
|
548
548
|
Apache Arrow format for efficient transfer. The dataframe structure is defined
|
|
@@ -554,8 +554,8 @@ class MLModelsClient:
|
|
|
554
554
|
model_type: The type of model. Supported types: BINARY, MULTI_CLASS, REGRESSION,
|
|
555
555
|
RANKING, OBJECT_DETECTION. Note: GENERATIVE_LLM is not supported; use the
|
|
556
556
|
spans module instead.
|
|
557
|
-
dataframe: Pandas DataFrame containing the data to
|
|
558
|
-
correspond to the schema field mappings.
|
|
557
|
+
dataframe (:class:`pandas.DataFrame`): Pandas DataFrame containing the data to
|
|
558
|
+
upload. Columns should correspond to the schema field mappings.
|
|
559
559
|
schema: Schema object (Schema or CorpusSchema) that defines the mapping between
|
|
560
560
|
dataframe columns and Arize data fields (e.g., prediction_label_column_name,
|
|
561
561
|
feature_column_names, etc.).
|
|
@@ -811,10 +811,10 @@ class MLModelsClient:
|
|
|
811
811
|
similarity_search_params: SimilaritySearchParams | None = None,
|
|
812
812
|
stream_chunk_size: int | None = None,
|
|
813
813
|
) -> pd.DataFrame:
|
|
814
|
-
"""Export model data from Arize to a pandas
|
|
814
|
+
"""Export model data from Arize to a :class:`pandas.DataFrame`.
|
|
815
815
|
|
|
816
816
|
Retrieves prediction and optional actual data for a model within a specified time
|
|
817
|
-
range and returns it as a pandas
|
|
817
|
+
range and returns it as a :class:`pandas.DataFrame` for analysis.
|
|
818
818
|
|
|
819
819
|
Args:
|
|
820
820
|
space_id: The space ID where the model resides.
|
|
@@ -835,8 +835,9 @@ class MLModelsClient:
|
|
|
835
835
|
stream_chunk_size: Optional chunk size for streaming large result sets.
|
|
836
836
|
|
|
837
837
|
Returns:
|
|
838
|
-
A pandas DataFrame containing the exported data
|
|
839
|
-
|
|
838
|
+
:class:`pandas.DataFrame`: A pandas DataFrame containing the exported data
|
|
839
|
+
with columns for predictions, actuals (if requested), features, tags,
|
|
840
|
+
timestamps, and other model metadata.
|
|
840
841
|
|
|
841
842
|
Raises:
|
|
842
843
|
RuntimeError: If the Flight client request fails or returns no response.
|
|
@@ -895,7 +896,7 @@ class MLModelsClient:
|
|
|
895
896
|
"""Export model data from Arize to a Parquet file and return as DataFrame.
|
|
896
897
|
|
|
897
898
|
Retrieves prediction and optional actual data for a model within a specified time
|
|
898
|
-
range, saves it as a Parquet file, and returns it as a pandas
|
|
899
|
+
range, saves it as a Parquet file, and returns it as a :class:`pandas.DataFrame`.
|
|
899
900
|
|
|
900
901
|
Args:
|
|
901
902
|
space_id: The space ID where the model resides.
|
|
@@ -916,8 +917,8 @@ class MLModelsClient:
|
|
|
916
917
|
stream_chunk_size: Optional chunk size for streaming large result sets.
|
|
917
918
|
|
|
918
919
|
Returns:
|
|
919
|
-
A pandas DataFrame containing the exported data.
|
|
920
|
-
|
|
920
|
+
:class:`pandas.DataFrame`: A pandas DataFrame containing the exported data.
|
|
921
|
+
The data is also saved to a Parquet file by the underlying export client.
|
|
921
922
|
|
|
922
923
|
Raises:
|
|
923
924
|
RuntimeError: If the Flight client request fails or returns no response.
|
arize/ml/proto.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# type: ignore[pb2]
|
|
1
2
|
"""Protocol buffer utilities for ML model data serialization."""
|
|
2
3
|
|
|
3
|
-
# type: ignore[pb2]
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
from google.protobuf.timestamp_pb2 import Timestamp
|
|
@@ -30,10 +30,10 @@ def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
|
|
|
30
30
|
"""Convert a dictionary to protobuf format with string keys and pb2.Value values.
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
d: Dictionary to convert, or None
|
|
33
|
+
d: Dictionary to convert, or :obj:`None`.
|
|
34
34
|
|
|
35
35
|
Returns:
|
|
36
|
-
Dictionary with string keys and protobuf Value objects, or empty dict if input is None
|
|
36
|
+
Dictionary with string keys and protobuf Value objects, or empty dict if input is :obj:`None`.
|
|
37
37
|
"""
|
|
38
38
|
if d is None:
|
|
39
39
|
return {}
|
|
@@ -56,7 +56,7 @@ def get_pb_value(name: str | int | float, value: pb2.Value) -> pb2.Value:
|
|
|
56
56
|
value: The value to convert to protobuf format.
|
|
57
57
|
|
|
58
58
|
Returns:
|
|
59
|
-
A pb2.Value protobuf object, or None if value cannot be converted.
|
|
59
|
+
A pb2.Value protobuf object, or :obj:`None` if value cannot be converted.
|
|
60
60
|
|
|
61
61
|
Raises:
|
|
62
62
|
TypeError: If value type is not supported.
|
|
@@ -139,10 +139,10 @@ def get_pb_timestamp(time_overwrite: int | None) -> object | None:
|
|
|
139
139
|
"""Convert a Unix timestamp to a protobuf Timestamp object.
|
|
140
140
|
|
|
141
141
|
Args:
|
|
142
|
-
time_overwrite: Unix epoch time in seconds, or None
|
|
142
|
+
time_overwrite: Unix epoch time in seconds, or :obj:`None`.
|
|
143
143
|
|
|
144
144
|
Returns:
|
|
145
|
-
A protobuf Timestamp object, or None if input is None
|
|
145
|
+
A protobuf Timestamp object, or :obj:`None` if input is :obj:`None`.
|
|
146
146
|
|
|
147
147
|
Raises:
|
|
148
148
|
TypeError: If time_overwrite is not an integer.
|
arize/ml/stream_validation.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
"""Stream validation logic for ML model predictions."""
|
|
2
|
-
|
|
3
1
|
# type: ignore[pb2]
|
|
2
|
+
"""Stream validation logic for ML model predictions."""
|
|
4
3
|
|
|
5
4
|
from arize.constants.ml import MAX_PREDICTION_ID_LEN, MIN_PREDICTION_ID_LEN
|
|
6
5
|
from arize.exceptions.parameters import (
|
|
@@ -185,7 +184,7 @@ def validate_and_convert_prediction_id(
|
|
|
185
184
|
"""Validate and convert a prediction ID to string format, or generate one if absent.
|
|
186
185
|
|
|
187
186
|
Args:
|
|
188
|
-
prediction_id: The prediction ID to validate/convert, or None
|
|
187
|
+
prediction_id: The prediction ID to validate/convert, or :obj:`None`.
|
|
189
188
|
environment: The environment context (training, validation, production).
|
|
190
189
|
prediction_label: Optional prediction label for delayed record detection.
|
|
191
190
|
actual_label: Optional actual label for delayed record detection.
|
|
@@ -36,7 +36,7 @@ class Mimic:
|
|
|
36
36
|
"""Initialize the Mimic explainer with training data and model.
|
|
37
37
|
|
|
38
38
|
Args:
|
|
39
|
-
X: Training data DataFrame for the surrogate model.
|
|
39
|
+
X: Training data :class:`pandas.DataFrame` for the surrogate model.
|
|
40
40
|
model_func: Model function to explain.
|
|
41
41
|
"""
|
|
42
42
|
self.explainer = MimicExplainer(
|
|
@@ -48,7 +48,7 @@ class Mimic:
|
|
|
48
48
|
)
|
|
49
49
|
|
|
50
50
|
def explain(self, X: pd.DataFrame) -> pd.DataFrame:
|
|
51
|
-
"""Explain feature importance for the given input DataFrame
|
|
51
|
+
"""Explain feature importance for the given input :class:`pandas.DataFrame`."""
|
|
52
52
|
return pd.DataFrame(
|
|
53
53
|
self.explainer.explain_local(X).local_importance_values,
|
|
54
54
|
columns=X.columns,
|
|
@@ -59,7 +59,7 @@ class Mimic:
|
|
|
59
59
|
def augment(
|
|
60
60
|
df: pd.DataFrame, schema: Schema, model_type: ModelTypes
|
|
61
61
|
) -> tuple[pd.DataFrame, Schema]:
|
|
62
|
-
"""Augment the DataFrame and schema with SHAP values for explainability."""
|
|
62
|
+
"""Augment the :class:`pandas.DataFrame` and schema with SHAP values for explainability."""
|
|
63
63
|
features = schema.feature_column_names
|
|
64
64
|
X = df[features]
|
|
65
65
|
|
arize/ml/types.py
CHANGED
|
@@ -16,27 +16,13 @@ from typing import (
|
|
|
16
16
|
import numpy as np
|
|
17
17
|
|
|
18
18
|
from arize.constants.ml import (
|
|
19
|
-
# MAX_MULTI_CLASS_NAME_LENGTH,
|
|
20
|
-
# MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
|
|
21
19
|
MAX_MULTI_CLASS_NAME_LENGTH,
|
|
22
20
|
MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
|
|
23
21
|
MAX_NUMBER_OF_SIMILARITY_REFERENCES,
|
|
24
22
|
MAX_RAW_DATA_CHARACTERS,
|
|
25
23
|
MAX_RAW_DATA_CHARACTERS_TRUNCATION,
|
|
26
|
-
# MAX_RAW_DATA_CHARACTERS,
|
|
27
|
-
# MAX_RAW_DATA_CHARACTERS_TRUNCATION,
|
|
28
24
|
)
|
|
29
25
|
from arize.exceptions.parameters import InvalidValueType
|
|
30
|
-
|
|
31
|
-
#
|
|
32
|
-
# from arize.utils.constants import (
|
|
33
|
-
# MAX_MULTI_CLASS_NAME_LENGTH,
|
|
34
|
-
# MAX_NUMBER_OF_MULTI_CLASS_CLASSES,
|
|
35
|
-
# MAX_NUMBER_OF_SIMILARITY_REFERENCES,
|
|
36
|
-
# MAX_RAW_DATA_CHARACTERS,
|
|
37
|
-
# MAX_RAW_DATA_CHARACTERS_TRUNCATION,
|
|
38
|
-
# )
|
|
39
|
-
# from arize.utils.errors import InvalidValueType
|
|
40
26
|
from arize.logging import get_truncation_warning_message
|
|
41
27
|
from arize.utils.types import is_dict_of, is_iterable_of, is_list_of
|
|
42
28
|
|
|
@@ -299,30 +285,6 @@ class Embedding(NamedTuple):
|
|
|
299
285
|
return any(isinstance(data, t) for t in (list, np.ndarray))
|
|
300
286
|
|
|
301
287
|
|
|
302
|
-
# @dataclass
|
|
303
|
-
# class _PromptOrResponseText:
|
|
304
|
-
# data: str
|
|
305
|
-
#
|
|
306
|
-
# def validate(self, name: str) -> None:
|
|
307
|
-
# # Validate that data is a string
|
|
308
|
-
# if not isinstance(self.data, str):
|
|
309
|
-
# raise TypeError(f"'{name}' must be a str")
|
|
310
|
-
#
|
|
311
|
-
# character_count = len(self.data)
|
|
312
|
-
# if character_count > MAX_RAW_DATA_CHARACTERS:
|
|
313
|
-
# raise ValueError(
|
|
314
|
-
# f"'{name}' field must not contain more than {MAX_RAW_DATA_CHARACTERS} characters. "
|
|
315
|
-
# f"Found {character_count}."
|
|
316
|
-
# )
|
|
317
|
-
# elif character_count > MAX_RAW_DATA_CHARACTERS_TRUNCATION:
|
|
318
|
-
# logger.warning(
|
|
319
|
-
# get_truncation_warning_message(
|
|
320
|
-
# f"'{name}'", MAX_RAW_DATA_CHARACTERS_TRUNCATION
|
|
321
|
-
# )
|
|
322
|
-
# )
|
|
323
|
-
# return None
|
|
324
|
-
|
|
325
|
-
|
|
326
288
|
class LLMRunMetadata(NamedTuple):
|
|
327
289
|
"""Metadata for LLM execution including token counts and latency."""
|
|
328
290
|
|
|
@@ -1021,22 +983,6 @@ class LLMRunMetadataColumnNames:
|
|
|
1021
983
|
)
|
|
1022
984
|
|
|
1023
985
|
|
|
1024
|
-
# @dataclass
|
|
1025
|
-
# class DocumentColumnNames:
|
|
1026
|
-
# id_column_name: Optional[str] = None
|
|
1027
|
-
# version_column_name: Optional[str] = None
|
|
1028
|
-
# text_embedding_column_names: Optional[EmbeddingColumnNames] = None
|
|
1029
|
-
#
|
|
1030
|
-
# def __iter__(self):
|
|
1031
|
-
# return iter(
|
|
1032
|
-
# (
|
|
1033
|
-
# self.id_column_name,
|
|
1034
|
-
# self.version_column_name,
|
|
1035
|
-
# self.text_embedding_column_names,
|
|
1036
|
-
# )
|
|
1037
|
-
# )
|
|
1038
|
-
#
|
|
1039
|
-
#
|
|
1040
986
|
@dataclass
|
|
1041
987
|
class SimilarityReference:
|
|
1042
988
|
"""Reference to a prediction for similarity search operations."""
|
|
@@ -1531,7 +1477,7 @@ def add_to_column_count_dictionary(
|
|
|
1531
1477
|
|
|
1532
1478
|
Args:
|
|
1533
1479
|
column_dictionary: Dictionary mapping column names to counts.
|
|
1534
|
-
col: The column name to increment, or None to skip.
|
|
1480
|
+
col: The column name to increment, or :obj:`None` to skip.
|
|
1535
1481
|
"""
|
|
1536
1482
|
if col:
|
|
1537
1483
|
if col in column_dictionary:
|
arize/pre_releases.py
CHANGED
|
@@ -4,6 +4,7 @@ import functools
|
|
|
4
4
|
import logging
|
|
5
5
|
from collections.abc import Callable
|
|
6
6
|
from enum import StrEnum
|
|
7
|
+
from typing import TypeVar
|
|
7
8
|
|
|
8
9
|
from arize.version import __version__
|
|
9
10
|
|
|
@@ -19,6 +20,8 @@ class ReleaseStage(StrEnum):
|
|
|
19
20
|
|
|
20
21
|
_WARNED: set[str] = set()
|
|
21
22
|
|
|
23
|
+
_F = TypeVar("_F", bound=Callable)
|
|
24
|
+
|
|
22
25
|
|
|
23
26
|
def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
|
|
24
27
|
article = "an" if stage is ReleaseStage.ALPHA else "a"
|
|
@@ -28,10 +31,10 @@ def _format_prerelease_message(*, key: str, stage: ReleaseStage) -> str:
|
|
|
28
31
|
)
|
|
29
32
|
|
|
30
33
|
|
|
31
|
-
def prerelease_endpoint(*, stage: ReleaseStage, key: str) ->
|
|
34
|
+
def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> Callable[[_F], _F]:
|
|
32
35
|
"""Decorate a method to emit a prerelease warning via logging once per process."""
|
|
33
36
|
|
|
34
|
-
def deco(fn:
|
|
37
|
+
def deco(fn: _F) -> _F:
|
|
35
38
|
@functools.wraps(fn)
|
|
36
39
|
def wrapper(*args: object, **kwargs: object) -> object:
|
|
37
40
|
if key not in _WARNED:
|
|
@@ -39,6 +42,6 @@ def prerelease_endpoint(*, stage: ReleaseStage, key: str) -> object:
|
|
|
39
42
|
logger.warning(_format_prerelease_message(key=key, stage=stage))
|
|
40
43
|
return fn(*args, **kwargs)
|
|
41
44
|
|
|
42
|
-
return wrapper
|
|
45
|
+
return wrapper # type: ignore[return-value]
|
|
43
46
|
|
|
44
47
|
return deco
|
arize/projects/client.py
CHANGED
|
@@ -9,6 +9,7 @@ from arize.pre_releases import ReleaseStage, prerelease_endpoint
|
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from arize._generated.api_client import models
|
|
12
|
+
from arize._generated.api_client.api_client import ApiClient
|
|
12
13
|
from arize.config import SDKConfiguration
|
|
13
14
|
|
|
14
15
|
logger = logging.getLogger(__name__)
|
|
@@ -26,18 +27,21 @@ class ProjectsClient:
|
|
|
26
27
|
:class:`arize.config.SDKConfiguration`.
|
|
27
28
|
"""
|
|
28
29
|
|
|
29
|
-
def __init__(
|
|
30
|
+
def __init__(
|
|
31
|
+
self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
|
|
32
|
+
) -> None:
|
|
30
33
|
"""
|
|
31
34
|
Args:
|
|
32
35
|
sdk_config: Resolved SDK configuration.
|
|
36
|
+
generated_client: Shared generated API client instance.
|
|
33
37
|
""" # noqa: D205, D212
|
|
34
38
|
self._sdk_config = sdk_config
|
|
35
39
|
|
|
36
40
|
# Import at runtime so it's still lazy and extras-gated by the parent
|
|
37
41
|
from arize._generated import api_client as gen
|
|
38
42
|
|
|
39
|
-
# Use the
|
|
40
|
-
self._api = gen.ProjectsApi(
|
|
43
|
+
# Use the provided client directly
|
|
44
|
+
self._api = gen.ProjectsApi(generated_client)
|
|
41
45
|
|
|
42
46
|
@prerelease_endpoint(key="projects.list", stage=ReleaseStage.BETA)
|
|
43
47
|
def list(
|
|
@@ -125,7 +129,8 @@ class ProjectsClient:
|
|
|
125
129
|
Args:
|
|
126
130
|
project_id: Project ID.
|
|
127
131
|
|
|
128
|
-
Returns:
|
|
132
|
+
Returns:
|
|
133
|
+
This method returns None on success (common empty 204 response).
|
|
129
134
|
|
|
130
135
|
Raises:
|
|
131
136
|
arize._generated.api_client.exceptions.ApiException: If the API request fails
|
arize/regions.py
CHANGED
|
@@ -13,7 +13,7 @@ class Region(StrEnum):
|
|
|
13
13
|
EU_WEST_1 = "eu-west-1a"
|
|
14
14
|
CA_CENTRAL_1 = "ca-central-1a"
|
|
15
15
|
US_EAST_1 = "us-east-1b"
|
|
16
|
-
|
|
16
|
+
UNSET = ""
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
@dataclass(frozen=True)
|
|
@@ -36,5 +36,5 @@ def _get_region_endpoints(region: Region) -> RegionEndpoints:
|
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
REGION_ENDPOINTS: dict[Region, RegionEndpoints] = {
|
|
39
|
-
r: _get_region_endpoints(r) for r in Region if r != Region.
|
|
39
|
+
r: _get_region_endpoints(r) for r in Region if r != Region.UNSET
|
|
40
40
|
}
|
arize/spans/client.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# type: ignore[pb2]
|
|
1
2
|
"""Client implementation for managing spans and traces in the Arize platform."""
|
|
2
3
|
|
|
3
|
-
# type: ignore[pb2]
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import json
|
|
@@ -78,10 +78,11 @@ class SpansClient:
|
|
|
78
78
|
Args:
|
|
79
79
|
space_id: The space ID where the project resides.
|
|
80
80
|
project_name: A unique name to identify your project in the Arize platform.
|
|
81
|
-
dataframe: The dataframe containing the LLM traces.
|
|
82
|
-
evals_dataframe: A dataframe containing
|
|
83
|
-
The evaluations are joined to their corresponding spans
|
|
84
|
-
using only `context.span_id` from the spans
|
|
81
|
+
dataframe (:class:`pandas.DataFrame`): The dataframe containing the LLM traces.
|
|
82
|
+
evals_dataframe (:class:`pandas.DataFrame` | :obj:`None`): A dataframe containing
|
|
83
|
+
LLM evaluations data. The evaluations are joined to their corresponding spans
|
|
84
|
+
via a left outer join, i.e., using only `context.span_id` from the spans
|
|
85
|
+
dataframe. Defaults to None.
|
|
85
86
|
datetime_format: format for the timestamp captured in the LLM traces.
|
|
86
87
|
Defaults to "%Y-%m-%dT%H:%M:%S.%f+00:00".
|
|
87
88
|
validate: When set to True, validation is run before sending data.
|
|
@@ -280,7 +281,7 @@ class SpansClient:
|
|
|
280
281
|
Args:
|
|
281
282
|
space_id: The space ID where the project resides.
|
|
282
283
|
project_name: A unique name to identify your project in the Arize platform.
|
|
283
|
-
dataframe: A dataframe containing LLM evaluations data.
|
|
284
|
+
dataframe (:class:`pandas.DataFrame`): A dataframe containing LLM evaluations data.
|
|
284
285
|
validate: When set to True, validation is run before sending data.
|
|
285
286
|
Defaults to True.
|
|
286
287
|
force_http: Force the use of HTTP for data upload. Defaults to False.
|
|
@@ -453,7 +454,7 @@ class SpansClient:
|
|
|
453
454
|
Args:
|
|
454
455
|
space_id: The space ID where the project resides.
|
|
455
456
|
project_name: A unique name to identify your project in the Arize platform.
|
|
456
|
-
dataframe: A dataframe containing LLM annotation data.
|
|
457
|
+
dataframe (:class:`pandas.DataFrame`): A dataframe containing LLM annotation data.
|
|
457
458
|
validate: When set to True, validation is run before sending data.
|
|
458
459
|
Defaults to True.
|
|
459
460
|
"""
|
|
@@ -684,7 +685,8 @@ class SpansClient:
|
|
|
684
685
|
Args:
|
|
685
686
|
space_id: The space ID where the project resides.
|
|
686
687
|
project_name: A unique name to identify your project in the Arize platform.
|
|
687
|
-
dataframe: DataFrame with span_ids and either patch
|
|
688
|
+
dataframe (:class:`pandas.DataFrame`): DataFrame with span_ids and either patch
|
|
689
|
+
documents or metadata field columns.
|
|
688
690
|
patch_document_column_name: Name of the column containing JSON patch documents.
|
|
689
691
|
Defaults to "patch_document".
|
|
690
692
|
validate: When set to True, validation is run before sending data.
|
|
@@ -1004,14 +1006,14 @@ class SpansClient:
|
|
|
1004
1006
|
columns: list | None = None,
|
|
1005
1007
|
stream_chunk_size: int | None = None,
|
|
1006
1008
|
) -> pd.DataFrame:
|
|
1007
|
-
"""Export span data from Arize to a pandas
|
|
1009
|
+
"""Export span data from Arize to a :class:`pandas.DataFrame`.
|
|
1008
1010
|
|
|
1009
1011
|
Retrieves trace/span data from the specified project within a time range
|
|
1010
|
-
and returns it as a pandas
|
|
1012
|
+
and returns it as a :class:`pandas.DataFrame`. Supports filtering with SQL-like
|
|
1011
1013
|
WHERE clauses and similarity search for semantic retrieval.
|
|
1012
1014
|
|
|
1013
1015
|
Returns:
|
|
1014
|
-
DataFrame containing the requested span data with columns
|
|
1016
|
+
:class:`pandas.DataFrame`: DataFrame containing the requested span data with columns
|
|
1015
1017
|
for span metadata, attributes, events, and any custom fields.
|
|
1016
1018
|
"""
|
|
1017
1019
|
with ArizeFlightClient(
|
arize/spans/columns.py
CHANGED
|
@@ -39,8 +39,6 @@ class SpanColumn:
|
|
|
39
39
|
self.data_type = data_type
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
#
|
|
43
|
-
#
|
|
44
42
|
# Root level columns
|
|
45
43
|
SPAN_TRACE_ID_COL = SpanColumn(
|
|
46
44
|
name="context.trace_id",
|
|
@@ -96,18 +94,18 @@ SPAN_KIND_COL = SpanColumn(
|
|
|
96
94
|
data_type=SpanColumnDataType.STRING,
|
|
97
95
|
)
|
|
98
96
|
# Attributes Exception columns
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
97
|
+
SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL = SpanColumn(
|
|
98
|
+
name=f"attributes.{otel.SpanAttributes.EXCEPTION_TYPE}",
|
|
99
|
+
data_type=SpanColumnDataType.STRING,
|
|
100
|
+
)
|
|
103
101
|
SPAN_ATTRIBUTES_EXCEPTION_MESSAGE_COL = SpanColumn(
|
|
104
102
|
name=f"attributes.{otel.SpanAttributes.EXCEPTION_MESSAGE}",
|
|
105
103
|
data_type=SpanColumnDataType.STRING,
|
|
106
104
|
)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
105
|
+
SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL = SpanColumn(
|
|
106
|
+
name=f"attributes.{otel.SpanAttributes.EXCEPTION_ESCAPED}",
|
|
107
|
+
data_type=SpanColumnDataType.BOOL,
|
|
108
|
+
)
|
|
111
109
|
SPAN_ATTRIBUTES_EXCEPTION_STACKTRACE_COL = SpanColumn(
|
|
112
110
|
name=f"attributes.{otel.SpanAttributes.EXCEPTION_STACKTRACE}",
|
|
113
111
|
data_type=SpanColumnDataType.STRING,
|
|
@@ -176,20 +174,19 @@ SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VERSION_COL = SpanColumn(
|
|
|
176
174
|
name=f"attributes.{oinf.SpanAttributes.LLM_PROMPT_TEMPLATE_VERSION}",
|
|
177
175
|
data_type=SpanColumnDataType.STRING,
|
|
178
176
|
)
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
177
|
+
SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL = SpanColumn(
|
|
178
|
+
name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_PROMPT}",
|
|
179
|
+
data_type=SpanColumnDataType.NUMERIC,
|
|
180
|
+
)
|
|
181
|
+
SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL = SpanColumn(
|
|
182
|
+
name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_COMPLETION}",
|
|
183
|
+
data_type=SpanColumnDataType.NUMERIC,
|
|
184
|
+
)
|
|
185
|
+
SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL = SpanColumn(
|
|
186
|
+
name=f"attributes.{oinf.SpanAttributes.LLM_TOKEN_COUNT_TOTAL}",
|
|
187
|
+
data_type=SpanColumnDataType.NUMERIC,
|
|
188
|
+
)
|
|
191
189
|
# Attributes Message Keys
|
|
192
|
-
# SPAN_ATTRIBUTES_MESSAGE_NAME_KEY = f"{oinf.MessageAttributes.MESSAGE_NAME}"
|
|
193
190
|
SPAN_ATTRIBUTES_MESSAGE_ROLE_KEY = f"{oinf.MessageAttributes.MESSAGE_ROLE}"
|
|
194
191
|
SPAN_ATTRIBUTES_MESSAGE_CONTENT_KEY = (
|
|
195
192
|
f"{oinf.MessageAttributes.MESSAGE_CONTENT}"
|
|
@@ -223,7 +220,6 @@ SPAN_ATTRIBUTES_RETRIEVAL_DOCUMENTS_COL = SpanColumn(
|
|
|
223
220
|
)
|
|
224
221
|
# Document Object Keys
|
|
225
222
|
SPAN_ATTRIBUTES_DOCUMENT_ID_KEY = f"{oinf.DocumentAttributes.DOCUMENT_ID}"
|
|
226
|
-
# SPAN_ATTRIBUTES_DOCUMENT_SCORE_KEY = f"{oinf.DocumentAttributes.DOCUMENT_SCORE}"
|
|
227
223
|
SPAN_ATTRIBUTES_DOCUMENT_CONTENT_KEY = (
|
|
228
224
|
f"{oinf.DocumentAttributes.DOCUMENT_CONTENT}"
|
|
229
225
|
)
|
|
@@ -247,10 +243,10 @@ SPAN_ATTRIBUTES_RERANKER_MODEL_NAME_COL = SpanColumn(
|
|
|
247
243
|
name=f"attributes.{oinf.RerankerAttributes.RERANKER_MODEL_NAME}",
|
|
248
244
|
data_type=SpanColumnDataType.STRING,
|
|
249
245
|
)
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
246
|
+
SPAN_ATTRIBUTES_RERANKER_TOP_K_COL = SpanColumn(
|
|
247
|
+
name=f"attributes.{oinf.RerankerAttributes.RERANKER_TOP_K}",
|
|
248
|
+
data_type=SpanColumnDataType.NUMERIC,
|
|
249
|
+
)
|
|
254
250
|
SPAN_ATTRIBUTES_SESSION_ID = SpanColumn(
|
|
255
251
|
name=f"attributes.{oinf.SpanAttributes.SESSION_ID}",
|
|
256
252
|
data_type=SpanColumnDataType.STRING,
|
|
@@ -281,9 +277,9 @@ SPAN_OPENINFERENCE_COLUMNS = [
|
|
|
281
277
|
SPAN_STATUS_CODE_COL,
|
|
282
278
|
SPAN_STATUS_MESSAGE_COL,
|
|
283
279
|
SPAN_EVENTS_COL,
|
|
284
|
-
|
|
280
|
+
SPAN_ATTRIBUTES_EXCEPTION_TYPE_COL,
|
|
285
281
|
SPAN_ATTRIBUTES_EXCEPTION_MESSAGE_COL,
|
|
286
|
-
|
|
282
|
+
SPAN_ATTRIBUTES_EXCEPTION_ESCAPED_COL,
|
|
287
283
|
SPAN_ATTRIBUTES_EXCEPTION_STACKTRACE_COL,
|
|
288
284
|
SPAN_ATTRIBUTES_INPUT_VALUE_COL,
|
|
289
285
|
SPAN_ATTRIBUTES_INPUT_MIME_TYPE_COL,
|
|
@@ -297,9 +293,9 @@ SPAN_OPENINFERENCE_COLUMNS = [
|
|
|
297
293
|
SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_TEMPLATE_COL,
|
|
298
294
|
SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VARIABLES_COL,
|
|
299
295
|
SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VERSION_COL,
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
296
|
+
SPAN_ATTRIBUTES_LLM_PROMPT_TOKEN_COUNT_COL,
|
|
297
|
+
SPAN_ATTRIBUTES_LLM_COMPLETION_TOKEN_COUNT_COL,
|
|
298
|
+
SPAN_ATTRIBUTES_LLM_TOTAL_TOKEN_COUNT_COL,
|
|
303
299
|
SPAN_ATTRIBUTES_TOOL_NAME_COL,
|
|
304
300
|
SPAN_ATTRIBUTES_TOOL_DESCRIPTION_COL,
|
|
305
301
|
SPAN_ATTRIBUTES_TOOL_PARAMETERS_COL,
|
|
@@ -308,18 +304,18 @@ SPAN_OPENINFERENCE_COLUMNS = [
|
|
|
308
304
|
SPAN_ATTRIBUTES_RERANKER_OUTPUT_DOCUMENTS_COL,
|
|
309
305
|
SPAN_ATTRIBUTES_RERANKER_QUERY_COL,
|
|
310
306
|
SPAN_ATTRIBUTES_RERANKER_MODEL_NAME_COL,
|
|
311
|
-
|
|
307
|
+
SPAN_ATTRIBUTES_RERANKER_TOP_K_COL,
|
|
312
308
|
SPAN_ATTRIBUTES_SESSION_ID,
|
|
313
309
|
SPAN_ATTRIBUTES_USER_ID,
|
|
314
310
|
SPAN_ATTRIBUTES_METADATA,
|
|
315
311
|
SPAN_ATTRIBUTES_LLM_TOOLS_COL,
|
|
316
312
|
]
|
|
317
|
-
|
|
313
|
+
|
|
318
314
|
# List of columns that must be present in the dataframe
|
|
319
315
|
SPAN_OPENINFERENCE_REQUIRED_COLUMNS = [
|
|
320
316
|
col for col in SPAN_OPENINFERENCE_COLUMNS if col.required
|
|
321
317
|
]
|
|
322
|
-
|
|
318
|
+
|
|
323
319
|
# Eval columns
|
|
324
320
|
# EVAL_COLUMN_PREFIX = "eval."
|
|
325
321
|
# SESSION_EVAL_COLUMN_PREFIX = "session_eval."
|
arize/spans/conversion.py
CHANGED
|
@@ -7,22 +7,21 @@ from datetime import datetime, timezone
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pandas as pd
|
|
9
9
|
|
|
10
|
-
# from arize.utils.logging import logger
|
|
11
10
|
from arize.spans.columns import SPAN_OPENINFERENCE_COLUMNS, SpanColumnDataType
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
def convert_timestamps(df: pd.DataFrame, fmt: str = "") -> pd.DataFrame:
|
|
15
|
-
"""Convert timestamp columns in a DataFrame to nanoseconds.
|
|
14
|
+
"""Convert timestamp columns in a :class:`pandas.DataFrame` to nanoseconds.
|
|
16
15
|
|
|
17
16
|
Args:
|
|
18
|
-
df: The pandas
|
|
17
|
+
df: The :class:`pandas.DataFrame` containing timestamp columns.
|
|
19
18
|
fmt: Optional datetime format string for parsing string timestamps. Defaults to "".
|
|
20
19
|
|
|
21
20
|
Returns:
|
|
22
|
-
The DataFrame with timestamp columns converted to nanoseconds.
|
|
21
|
+
The :class:`pandas.DataFrame` with timestamp columns converted to nanoseconds.
|
|
23
22
|
|
|
24
23
|
Raises:
|
|
25
|
-
KeyError: If required timestamp column is not found in DataFrame
|
|
24
|
+
KeyError: If required timestamp column is not found in :class:`pandas.DataFrame`.
|
|
26
25
|
"""
|
|
27
26
|
for col in SPAN_OPENINFERENCE_COLUMNS:
|
|
28
27
|
if col.data_type != SpanColumnDataType.TIMESTAMP:
|
|
@@ -70,7 +69,7 @@ def jsonify_dictionaries(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
70
69
|
"""Convert dictionary and list-of-dictionary columns to JSON strings.
|
|
71
70
|
|
|
72
71
|
Args:
|
|
73
|
-
df: The pandas
|
|
72
|
+
df: The :class:`pandas.DataFrame` containing dictionary columns.
|
|
74
73
|
|
|
75
74
|
Returns:
|
|
76
75
|
The DataFrame with dictionary columns converted to JSON strings.
|
|
@@ -39,13 +39,13 @@ def check_field_convertible_to_str(
|
|
|
39
39
|
def check_dataframe_type(
|
|
40
40
|
dataframe: object,
|
|
41
41
|
) -> list[InvalidTypeArgument]:
|
|
42
|
-
"""Validates that the provided argument is a pandas
|
|
42
|
+
"""Validates that the provided argument is a :class:`pandas.DataFrame`.
|
|
43
43
|
|
|
44
44
|
Args:
|
|
45
|
-
dataframe: The object to validate as a pandas
|
|
45
|
+
dataframe: The object to validate as a :class:`pandas.DataFrame`.
|
|
46
46
|
|
|
47
47
|
Returns:
|
|
48
|
-
List of validation errors if not a DataFrame (empty if valid).
|
|
48
|
+
List of validation errors if not a :class:`pandas.DataFrame` (empty if valid).
|
|
49
49
|
"""
|
|
50
50
|
if not isinstance(dataframe, pd.DataFrame):
|
|
51
51
|
return [
|