arize 8.0.0b1__py3-none-any.whl → 8.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +1 -1
- arize/_client_factory.py +50 -0
- arize/_flight/client.py +4 -4
- arize/_generated/api_client/api/datasets_api.py +6 -6
- arize/_generated/api_client/api/experiments_api.py +6 -6
- arize/_generated/api_client/api/projects_api.py +3 -3
- arize/_lazy.py +25 -9
- arize/client.py +6 -16
- arize/config.py +9 -36
- arize/constants/ml.py +9 -16
- arize/constants/spans.py +5 -10
- arize/datasets/client.py +13 -9
- arize/datasets/errors.py +1 -1
- arize/datasets/validation.py +2 -2
- arize/embeddings/auto_generator.py +2 -2
- arize/embeddings/errors.py +2 -2
- arize/embeddings/tabular_generators.py +1 -1
- arize/exceptions/base.py +0 -52
- arize/exceptions/parameters.py +0 -329
- arize/experiments/client.py +14 -7
- arize/experiments/evaluators/base.py +6 -6
- arize/experiments/evaluators/executors.py +10 -3
- arize/experiments/evaluators/types.py +2 -2
- arize/experiments/functions.py +18 -11
- arize/experiments/types.py +3 -5
- arize/logging.py +1 -1
- arize/ml/batch_validation/errors.py +10 -1004
- arize/ml/batch_validation/validator.py +273 -225
- arize/ml/casting.py +7 -7
- arize/ml/client.py +12 -11
- arize/ml/proto.py +6 -6
- arize/ml/stream_validation.py +2 -3
- arize/ml/surrogate_explainer/mimic.py +3 -3
- arize/ml/types.py +1 -55
- arize/pre_releases.py +6 -3
- arize/projects/client.py +9 -4
- arize/regions.py +2 -2
- arize/spans/client.py +13 -11
- arize/spans/columns.py +32 -36
- arize/spans/conversion.py +5 -6
- arize/spans/validation/common/argument_validation.py +3 -3
- arize/spans/validation/common/dataframe_form_validation.py +6 -6
- arize/spans/validation/common/value_validation.py +1 -1
- arize/spans/validation/evals/dataframe_form_validation.py +4 -4
- arize/spans/validation/evals/evals_validation.py +6 -6
- arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
- arize/spans/validation/spans/dataframe_form_validation.py +2 -2
- arize/spans/validation/spans/spans_validation.py +6 -6
- arize/utils/arrow.py +2 -2
- arize/utils/cache.py +2 -2
- arize/utils/dataframe.py +4 -4
- arize/utils/online_tasks/dataframe_preprocessor.py +7 -7
- arize/utils/openinference_conversion.py +10 -10
- arize/utils/proto.py +1 -1
- arize/version.py +1 -1
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/METADATA +23 -6
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/RECORD +60 -59
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/WHEEL +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b2.dist-info}/licenses/NOTICE +0 -0
arize/__init__.py
CHANGED
|
@@ -37,7 +37,7 @@ def make_to_df(field_name: str) -> object:
|
|
|
37
37
|
json_normalize: bool = False,
|
|
38
38
|
convert_dtypes: bool = True,
|
|
39
39
|
) -> object:
|
|
40
|
-
"""Convert a list of objects to a pandas
|
|
40
|
+
"""Convert a list of objects to a :class:`pandas.DataFrame`.
|
|
41
41
|
|
|
42
42
|
Behavior:
|
|
43
43
|
- If an item is a Pydantic v2 model, use `.model_dump(by_alias=...)`.
|
arize/_client_factory.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Factory for creating and caching the generated OpenAPI client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from arize._generated.api_client.api_client import ApiClient
|
|
10
|
+
from arize.config import SDKConfiguration
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GeneratedClientFactory:
|
|
14
|
+
"""Factory for creating and caching generated OpenAPI clients.
|
|
15
|
+
|
|
16
|
+
This factory is owned by ArizeClient and provides thread-safe lazy
|
|
17
|
+
initialization of the OpenAPI client used by various subclients.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, sdk_config: SDKConfiguration) -> None:
|
|
21
|
+
"""Initialize the factory.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
sdk_config: SDK configuration containing API settings.
|
|
25
|
+
"""
|
|
26
|
+
self._sdk_config = sdk_config
|
|
27
|
+
self._client: ApiClient | None = None
|
|
28
|
+
self._lock = threading.Lock()
|
|
29
|
+
|
|
30
|
+
def get_client(self) -> ApiClient:
|
|
31
|
+
"""Get or create the generated OpenAPI client instance.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
The shared generated API client instance.
|
|
35
|
+
"""
|
|
36
|
+
if self._client is not None:
|
|
37
|
+
return self._client
|
|
38
|
+
|
|
39
|
+
with self._lock:
|
|
40
|
+
if self._client is not None:
|
|
41
|
+
return self._client
|
|
42
|
+
|
|
43
|
+
# Import lazily to avoid extra dependencies at config time
|
|
44
|
+
from arize._generated import api_client as gen
|
|
45
|
+
|
|
46
|
+
cfg = gen.Configuration(host=self._sdk_config.api_url)
|
|
47
|
+
if self._sdk_config.api_key:
|
|
48
|
+
cfg.access_token = self._sdk_config.api_key
|
|
49
|
+
self._client = gen.ApiClient(cfg)
|
|
50
|
+
return self._client
|
arize/_flight/client.py
CHANGED
|
@@ -333,8 +333,8 @@ class ArizeFlightClient:
|
|
|
333
333
|
latest version.
|
|
334
334
|
|
|
335
335
|
Returns:
|
|
336
|
-
A pandas DataFrame containing the dataset examples
|
|
337
|
-
|
|
336
|
+
:class:`pandas.DataFrame`: A pandas DataFrame containing the dataset examples
|
|
337
|
+
with JSON string columns converted to dict objects.
|
|
338
338
|
|
|
339
339
|
Raises:
|
|
340
340
|
RuntimeError: If the Flight request fails.
|
|
@@ -374,8 +374,8 @@ class ArizeFlightClient:
|
|
|
374
374
|
experiment_id: Experiment ID to retrieve runs from.
|
|
375
375
|
|
|
376
376
|
Returns:
|
|
377
|
-
A pandas DataFrame containing the experiment runs
|
|
378
|
-
|
|
377
|
+
:class:`pandas.DataFrame`: A pandas DataFrame containing the experiment runs
|
|
378
|
+
with JSON string columns converted to dict objects.
|
|
379
379
|
|
|
380
380
|
Raises:
|
|
381
381
|
RuntimeError: If the Flight request fails.
|
|
@@ -940,7 +940,7 @@ class DatasetsApi:
|
|
|
940
940
|
self,
|
|
941
941
|
dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
|
|
942
942
|
dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
|
|
943
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
943
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
944
944
|
_request_timeout: Union[
|
|
945
945
|
None,
|
|
946
946
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1021,7 +1021,7 @@ class DatasetsApi:
|
|
|
1021
1021
|
self,
|
|
1022
1022
|
dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
|
|
1023
1023
|
dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
|
|
1024
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1024
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1025
1025
|
_request_timeout: Union[
|
|
1026
1026
|
None,
|
|
1027
1027
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1102,7 +1102,7 @@ class DatasetsApi:
|
|
|
1102
1102
|
self,
|
|
1103
1103
|
dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
|
|
1104
1104
|
dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
|
|
1105
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1105
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1106
1106
|
_request_timeout: Union[
|
|
1107
1107
|
None,
|
|
1108
1108
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1861,7 +1861,7 @@ class DatasetsApi:
|
|
|
1861
1861
|
def datasets_list(
|
|
1862
1862
|
self,
|
|
1863
1863
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
1864
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1864
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1865
1865
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1866
1866
|
_request_timeout: Union[
|
|
1867
1867
|
None,
|
|
@@ -1941,7 +1941,7 @@ class DatasetsApi:
|
|
|
1941
1941
|
def datasets_list_with_http_info(
|
|
1942
1942
|
self,
|
|
1943
1943
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
1944
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1944
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1945
1945
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1946
1946
|
_request_timeout: Union[
|
|
1947
1947
|
None,
|
|
@@ -2021,7 +2021,7 @@ class DatasetsApi:
|
|
|
2021
2021
|
def datasets_list_without_preload_content(
|
|
2022
2022
|
self,
|
|
2023
2023
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
2024
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
2024
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
2025
2025
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
2026
2026
|
_request_timeout: Union[
|
|
2027
2027
|
None,
|
|
@@ -898,7 +898,7 @@ class ExperimentsApi:
|
|
|
898
898
|
def experiments_list(
|
|
899
899
|
self,
|
|
900
900
|
dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
|
|
901
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
901
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
902
902
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
903
903
|
_request_timeout: Union[
|
|
904
904
|
None,
|
|
@@ -978,7 +978,7 @@ class ExperimentsApi:
|
|
|
978
978
|
def experiments_list_with_http_info(
|
|
979
979
|
self,
|
|
980
980
|
dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
|
|
981
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
981
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
982
982
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
983
983
|
_request_timeout: Union[
|
|
984
984
|
None,
|
|
@@ -1058,7 +1058,7 @@ class ExperimentsApi:
|
|
|
1058
1058
|
def experiments_list_without_preload_content(
|
|
1059
1059
|
self,
|
|
1060
1060
|
dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
|
|
1061
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1061
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1062
1062
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1063
1063
|
_request_timeout: Union[
|
|
1064
1064
|
None,
|
|
@@ -1211,7 +1211,7 @@ class ExperimentsApi:
|
|
|
1211
1211
|
def experiments_runs_list(
|
|
1212
1212
|
self,
|
|
1213
1213
|
experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
|
|
1214
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1214
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1215
1215
|
_request_timeout: Union[
|
|
1216
1216
|
None,
|
|
1217
1217
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1288,7 +1288,7 @@ class ExperimentsApi:
|
|
|
1288
1288
|
def experiments_runs_list_with_http_info(
|
|
1289
1289
|
self,
|
|
1290
1290
|
experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
|
|
1291
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1291
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1292
1292
|
_request_timeout: Union[
|
|
1293
1293
|
None,
|
|
1294
1294
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1365,7 +1365,7 @@ class ExperimentsApi:
|
|
|
1365
1365
|
def experiments_runs_list_without_preload_content(
|
|
1366
1366
|
self,
|
|
1367
1367
|
experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
|
|
1368
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1368
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1369
1369
|
_request_timeout: Union[
|
|
1370
1370
|
None,
|
|
1371
1371
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -891,7 +891,7 @@ class ProjectsApi:
|
|
|
891
891
|
def projects_list(
|
|
892
892
|
self,
|
|
893
893
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
894
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
894
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
895
895
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
896
896
|
_request_timeout: Union[
|
|
897
897
|
None,
|
|
@@ -970,7 +970,7 @@ class ProjectsApi:
|
|
|
970
970
|
def projects_list_with_http_info(
|
|
971
971
|
self,
|
|
972
972
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
973
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
973
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
974
974
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
975
975
|
_request_timeout: Union[
|
|
976
976
|
None,
|
|
@@ -1049,7 +1049,7 @@ class ProjectsApi:
|
|
|
1049
1049
|
def projects_list_without_preload_content(
|
|
1050
1050
|
self,
|
|
1051
1051
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
1052
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1052
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1053
1053
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1054
1054
|
_request_timeout: Union[
|
|
1055
1055
|
None,
|
arize/_lazy.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# src/arize/_lazy.py
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import inspect
|
|
4
5
|
import logging
|
|
5
6
|
import sys
|
|
6
7
|
import threading
|
|
@@ -8,6 +9,8 @@ from importlib import import_module
|
|
|
8
9
|
from typing import TYPE_CHECKING, ClassVar
|
|
9
10
|
|
|
10
11
|
if TYPE_CHECKING:
|
|
12
|
+
import types
|
|
13
|
+
|
|
11
14
|
from arize.config import SDKConfiguration
|
|
12
15
|
|
|
13
16
|
logger = logging.getLogger(__name__)
|
|
@@ -22,6 +25,11 @@ class LazySubclientsMixin:
|
|
|
22
25
|
self._lazy_cache: dict[str, object] = {}
|
|
23
26
|
self._lazy_lock = threading.Lock()
|
|
24
27
|
|
|
28
|
+
# Add generated client factory
|
|
29
|
+
from arize._client_factory import GeneratedClientFactory
|
|
30
|
+
|
|
31
|
+
self._gen_client_factory = GeneratedClientFactory(sdk_config)
|
|
32
|
+
|
|
25
33
|
def __getattr__(self, name: str) -> object:
|
|
26
34
|
subs = self._SUBCLIENTS
|
|
27
35
|
if name not in subs:
|
|
@@ -41,12 +49,18 @@ class LazySubclientsMixin:
|
|
|
41
49
|
module = _dynamic_import(module_path)
|
|
42
50
|
klass = getattr(module, class_name)
|
|
43
51
|
|
|
44
|
-
#
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
# Determine which parameters this subclient needs
|
|
53
|
+
# and build kwargs accordingly
|
|
54
|
+
sig = inspect.signature(klass.__init__)
|
|
55
|
+
kwargs = {}
|
|
56
|
+
if "sdk_config" in sig.parameters:
|
|
57
|
+
kwargs["sdk_config"] = self.sdk_config
|
|
58
|
+
if "generated_client" in sig.parameters:
|
|
59
|
+
kwargs["generated_client"] = (
|
|
60
|
+
self._gen_client_factory.get_client()
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
instance = klass(**kwargs)
|
|
50
64
|
self._lazy_cache[name] = instance
|
|
51
65
|
return instance
|
|
52
66
|
|
|
@@ -82,8 +96,8 @@ def require(
|
|
|
82
96
|
)
|
|
83
97
|
|
|
84
98
|
|
|
85
|
-
def _dynamic_import(modname: str, retries: int = 2) ->
|
|
86
|
-
def _attempt_import(remaining_attempts: int) ->
|
|
99
|
+
def _dynamic_import(modname: str, retries: int = 2) -> types.ModuleType:
|
|
100
|
+
def _attempt_import(remaining_attempts: int) -> types.ModuleType:
|
|
87
101
|
try:
|
|
88
102
|
return import_module(modname)
|
|
89
103
|
except (ModuleNotFoundError, ImportError, KeyError):
|
|
@@ -92,4 +106,6 @@ def _dynamic_import(modname: str, retries: int = 2) -> object:
|
|
|
92
106
|
raise
|
|
93
107
|
return _attempt_import(remaining_attempts - 1)
|
|
94
108
|
|
|
95
|
-
|
|
109
|
+
if retries <= 0:
|
|
110
|
+
raise ValueError(f"retries must be > 0, got {retries}")
|
|
111
|
+
return _attempt_import(retries)
|
arize/client.py
CHANGED
|
@@ -20,11 +20,6 @@ if TYPE_CHECKING:
|
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
-
# TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
|
|
24
|
-
|
|
25
|
-
# TODO(Kiko): Clean commented lines over the SDK
|
|
26
|
-
# TODO(Kiko): Implement https://github.com/Arize-ai/arize/pull/59917
|
|
27
|
-
|
|
28
23
|
# TODO(Kiko): Go private connect. Need a `base_domain`, such that we get:
|
|
29
24
|
# - api.<base_domain>
|
|
30
25
|
# - app.<base_domain>
|
|
@@ -33,29 +28,23 @@ logger = logging.getLogger(__name__)
|
|
|
33
28
|
|
|
34
29
|
# TODO(Kiko): Enforce type checking, remove all type ignores
|
|
35
30
|
|
|
36
|
-
# TODO(Kiko): Go over docstrings
|
|
37
|
-
# TODO(Kiko): Missing parameter descriptions in some docstrings
|
|
38
|
-
# TODO(Kiko): Missing return descriptions in some docstrings
|
|
39
|
-
|
|
40
31
|
# TODO(Kiko): Go over headers on each logging call
|
|
32
|
+
# TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
|
|
41
33
|
|
|
42
34
|
# TODO(Kiko): Need to implement 'Update existing examples in a dataset'
|
|
43
35
|
|
|
44
|
-
# TODO(Kiko): why logs don't show on scripts, only on jupyter notebooks
|
|
45
36
|
# TODO(Kiko): test caching in colab environment
|
|
46
37
|
# TODO(Kiko): Protobuf versioning is too old
|
|
47
38
|
# TODO(Kiko): Go through main APIs and add CtxAdapter where missing
|
|
48
39
|
# TODO(Kiko): Search and handle other TODOs
|
|
49
|
-
# TODO(Kiko): Go over **every file** and do not import anything at runtime, use `if TYPE_CHECKING`
|
|
50
|
-
# with `from __future__ import annotations` (must include for Python < 3.11)
|
|
51
40
|
|
|
52
41
|
|
|
53
42
|
class ArizeClient(LazySubclientsMixin):
|
|
54
43
|
"""Root client for the Arize SDK.
|
|
55
44
|
|
|
56
45
|
The ArizeClient provides access to all Arize platform services including datasets,
|
|
57
|
-
experiments, ML models, projects, and spans. It uses SDKConfiguration
|
|
58
|
-
manage configuration settings.
|
|
46
|
+
experiments, ML models, projects, and spans. It uses :class:`arize.config.SDKConfiguration`
|
|
47
|
+
internally to manage configuration settings.
|
|
59
48
|
|
|
60
49
|
All parameters are optional (except api_key which must be provided via argument
|
|
61
50
|
or environment variable). For each parameter, values are resolved in this order:
|
|
@@ -149,7 +138,7 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
149
138
|
api_key: Arize API key for authentication. Required - must be provided here
|
|
150
139
|
or via ARIZE_API_KEY environment variable. Raises MissingAPIKeyError if not set.
|
|
151
140
|
region: Arize region (e.g., Region.US_CENTRAL, Region.EU_WEST). When specified,
|
|
152
|
-
overrides individual host/port settings. ENV: ARIZE_REGION. Default: Region.
|
|
141
|
+
overrides individual host/port settings. ENV: ARIZE_REGION. Default: Region.UNSET.
|
|
153
142
|
api_host: Custom API endpoint host. ENV: ARIZE_API_HOST. Default: "api.arize.com".
|
|
154
143
|
api_scheme: API endpoint scheme (http/https). ENV: ARIZE_API_SCHEME. Default: "https".
|
|
155
144
|
otlp_host: OTLP endpoint host. ENV: ARIZE_OTLP_HOST. Default: "otlp.arize.com".
|
|
@@ -184,7 +173,8 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
184
173
|
|
|
185
174
|
Notes:
|
|
186
175
|
Values provided to this class override environment variables, which in turn
|
|
187
|
-
override default values. See SDKConfiguration
|
|
176
|
+
override default values. See :class:`arize.config.SDKConfiguration`
|
|
177
|
+
for detailed parameter documentation.
|
|
188
178
|
"""
|
|
189
179
|
cfg_kwargs: dict = {}
|
|
190
180
|
if api_key is not None:
|
arize/config.py
CHANGED
|
@@ -3,10 +3,8 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
|
-
import threading
|
|
7
6
|
from dataclasses import dataclass, field, fields
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
10
8
|
|
|
11
9
|
from arize.constants.config import (
|
|
12
10
|
DEFAULT_API_HOST,
|
|
@@ -167,6 +165,9 @@ def _parse_bool(val: bool | str | None) -> bool:
|
|
|
167
165
|
class SDKConfiguration:
|
|
168
166
|
"""Configuration for the Arize SDK with endpoint and authentication settings.
|
|
169
167
|
|
|
168
|
+
This class holds pure configuration data and does not manage client lifecycle.
|
|
169
|
+
Client creation and caching is handled by :class:`arize.ArizeClient`.
|
|
170
|
+
|
|
170
171
|
This class is used internally by ArizeClient to manage SDK configuration. It is not
|
|
171
172
|
recommended to use this class directly; users should interact with ArizeClient
|
|
172
173
|
instead.
|
|
@@ -225,13 +226,16 @@ class SDKConfiguration:
|
|
|
225
226
|
region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
|
|
226
227
|
individual host/port settings.
|
|
227
228
|
Environment variable: ARIZE_REGION.
|
|
228
|
-
Default: Region.
|
|
229
|
+
Default: :class:`Region.UNSET`.
|
|
229
230
|
single_host: Single host to use for all endpoints. Overrides individual host settings.
|
|
230
231
|
Environment variable: ARIZE_SINGLE_HOST.
|
|
231
232
|
Default: "" (not set).
|
|
232
233
|
single_port: Single port to use for all endpoints. Overrides individual port settings (0-65535).
|
|
233
234
|
Environment variable: ARIZE_SINGLE_PORT.
|
|
234
235
|
Default: 0 (not set).
|
|
236
|
+
|
|
237
|
+
Raises:
|
|
238
|
+
MissingAPIKeyError: If api_key is not provided via argument or environment variable.
|
|
235
239
|
"""
|
|
236
240
|
|
|
237
241
|
api_key: str = field(
|
|
@@ -323,17 +327,11 @@ class SDKConfiguration:
|
|
|
323
327
|
)
|
|
324
328
|
)
|
|
325
329
|
|
|
326
|
-
# Private, excluded from comparisons & repr
|
|
327
|
-
_gen_client: Any = field(default=None, repr=False, compare=False)
|
|
328
|
-
_gen_lock: threading.Lock = field(
|
|
329
|
-
default_factory=threading.Lock, repr=False, compare=False
|
|
330
|
-
)
|
|
331
|
-
|
|
332
330
|
def __post_init__(self) -> None:
|
|
333
331
|
"""Validate and configure SDK endpoints after initialization.
|
|
334
332
|
|
|
335
333
|
Raises:
|
|
336
|
-
MissingAPIKeyError: If
|
|
334
|
+
MissingAPIKeyError: If api_key is not provided via argument or environment variable.
|
|
337
335
|
"""
|
|
338
336
|
# Validate Configuration
|
|
339
337
|
if not self.api_key:
|
|
@@ -341,7 +339,7 @@ class SDKConfiguration:
|
|
|
341
339
|
|
|
342
340
|
has_single_host = bool(self.single_host)
|
|
343
341
|
has_single_port = self.single_port != 0
|
|
344
|
-
has_region = self.region is not Region.
|
|
342
|
+
has_region = self.region is not Region.UNSET
|
|
345
343
|
if (has_single_host or has_single_port) and has_region:
|
|
346
344
|
logger.info(
|
|
347
345
|
"Multiple endpoint override options provided. Preference order is: "
|
|
@@ -444,28 +442,3 @@ class SDKConfiguration:
|
|
|
444
442
|
lines.append(f" {f.name}={val!r},")
|
|
445
443
|
lines.append(")")
|
|
446
444
|
return "\n".join(lines)
|
|
447
|
-
|
|
448
|
-
# TODO(Kiko): This may not be well placed in this class
|
|
449
|
-
def get_generated_client(self) -> object:
|
|
450
|
-
"""Get or create the generated OpenAPI client instance."""
|
|
451
|
-
# If already cached, return immediately
|
|
452
|
-
if self._gen_client is not None:
|
|
453
|
-
return self._gen_client
|
|
454
|
-
|
|
455
|
-
# Thread-safe initialization
|
|
456
|
-
with self._gen_lock:
|
|
457
|
-
if self._gen_client is not None:
|
|
458
|
-
return self._gen_client
|
|
459
|
-
|
|
460
|
-
# Import lazily so extra dependencies can be
|
|
461
|
-
# enforced outside the configuration class
|
|
462
|
-
from arize._generated import api_client as gen
|
|
463
|
-
|
|
464
|
-
cfg = gen.Configuration(host=self.api_url)
|
|
465
|
-
if self.api_key:
|
|
466
|
-
cfg.access_token = self.api_key
|
|
467
|
-
client = gen.ApiClient(cfg)
|
|
468
|
-
|
|
469
|
-
# Bypass frozen to set the cache once
|
|
470
|
-
object.__setattr__(self, "_gen_client", client)
|
|
471
|
-
return client
|
arize/constants/ml.py
CHANGED
|
@@ -3,50 +3,43 @@
|
|
|
3
3
|
import json
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
# MAX_BYTES_PER_BULK_RECORD = 100000
|
|
7
|
-
# MAX_DAYS_WITHIN_RANGE = 365
|
|
8
6
|
MIN_PREDICTION_ID_LEN = 1
|
|
9
7
|
MAX_PREDICTION_ID_LEN = 512
|
|
10
8
|
MIN_DOCUMENT_ID_LEN = 1
|
|
11
9
|
MAX_DOCUMENT_ID_LEN = 128
|
|
12
|
-
#
|
|
10
|
+
# The maximum number of character for tag values
|
|
13
11
|
MAX_TAG_LENGTH = 20_000
|
|
14
12
|
MAX_TAG_LENGTH_TRUNCATION = 1_000
|
|
15
|
-
#
|
|
13
|
+
# The maximum number of character for embedding raw data
|
|
16
14
|
MAX_RAW_DATA_CHARACTERS = 2_000_000
|
|
17
15
|
MAX_RAW_DATA_CHARACTERS_TRUNCATION = 5_000
|
|
18
16
|
# The maximum number of acceptable years in the past from current time for prediction_timestamps
|
|
19
17
|
MAX_PAST_YEARS_FROM_CURRENT_TIME = 5
|
|
20
18
|
# The maximum number of acceptable years in the future from current time for prediction_timestamps
|
|
21
19
|
MAX_FUTURE_YEARS_FROM_CURRENT_TIME = 1
|
|
22
|
-
#
|
|
20
|
+
# The maximum number of character for llm model name
|
|
23
21
|
MAX_LLM_MODEL_NAME_LENGTH = 20_000
|
|
24
22
|
MAX_LLM_MODEL_NAME_LENGTH_TRUNCATION = 50
|
|
25
|
-
#
|
|
23
|
+
# The maximum number of character for prompt template
|
|
26
24
|
MAX_PROMPT_TEMPLATE_LENGTH = 50_000
|
|
27
25
|
MAX_PROMPT_TEMPLATE_LENGTH_TRUNCATION = 5_000
|
|
28
|
-
#
|
|
26
|
+
# The maximum number of character for prompt template version
|
|
29
27
|
MAX_PROMPT_TEMPLATE_VERSION_LENGTH = 20_000
|
|
30
28
|
MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
|
|
31
|
-
#
|
|
29
|
+
# The maximum number of embeddings
|
|
32
30
|
MAX_NUMBER_OF_EMBEDDINGS = 30
|
|
33
31
|
MAX_EMBEDDING_DIMENSIONALITY = 20_000
|
|
34
|
-
#
|
|
32
|
+
# The maximum number of classes for multi class
|
|
35
33
|
MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 500
|
|
36
34
|
MAX_MULTI_CLASS_NAME_LENGTH = 100
|
|
37
35
|
# The maximum number of references in embedding similarity search params
|
|
38
36
|
MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
|
|
39
|
-
#
|
|
40
|
-
# # Arize generated columns
|
|
41
|
-
# GENERATED_PREDICTION_LABEL_COL = "arize_generated_prediction_label"
|
|
42
|
-
# GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
|
|
43
|
-
#
|
|
44
|
-
# # reserved columns for LLM run metadata
|
|
37
|
+
# reserved columns for LLM run metadata
|
|
45
38
|
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count" # noqa: S105
|
|
46
39
|
LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count" # noqa: S105
|
|
47
40
|
LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count" # noqa: S105
|
|
48
41
|
LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
|
|
49
|
-
|
|
42
|
+
|
|
50
43
|
# all reserved tags
|
|
51
44
|
RESERVED_TAG_COLS = [
|
|
52
45
|
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME,
|
arize/constants/spans.py
CHANGED
|
@@ -5,19 +5,15 @@ DEFAULT_DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%f+00:00"
|
|
|
5
5
|
# Minumum/Maximum number of characters for span/trace/parent ids in spans
|
|
6
6
|
SPAN_ID_MIN_STR_LENGTH = 12
|
|
7
7
|
SPAN_ID_MAX_STR_LENGTH = 128
|
|
8
|
-
#
|
|
8
|
+
# Minumum/Maximum number of characters for span name
|
|
9
9
|
SPAN_NAME_MIN_STR_LENGTH = 0
|
|
10
10
|
SPAN_NAME_MAX_STR_LENGTH = 50
|
|
11
|
-
#
|
|
11
|
+
# Minumum/Maximum number of characters for span status message
|
|
12
12
|
SPAN_STATUS_MSG_MIN_STR_LENGTH = 0
|
|
13
13
|
SPAN_STATUS_MSG_MAX_STR_LENGTH = 10_000
|
|
14
|
-
#
|
|
14
|
+
# Minumum/Maximum number of characters for span event name
|
|
15
15
|
SPAN_EVENT_NAME_MAX_STR_LENGTH = 100
|
|
16
|
-
#
|
|
17
|
-
# SPAN_EVENT_ATTRS_MAX_STR_LENGTH = 10_000
|
|
18
|
-
# # Maximum number of characters for span kind
|
|
19
|
-
# SPAN_KIND_MAX_STR_LENGTH = 100
|
|
20
|
-
# SPAN_EXCEPTION_TYPE_MAX_STR_LENGTH = 100
|
|
16
|
+
# Minumum/Maximum number of characters for span event attributes
|
|
21
17
|
SPAN_EXCEPTION_MESSAGE_MAX_STR_LENGTH = 100
|
|
22
18
|
SPAN_EXCEPTION_STACK_TRACE_MAX_STR_LENGTH = 10_000
|
|
23
19
|
SPAN_IO_VALUE_MAX_STR_LENGTH = 4_000_000
|
|
@@ -29,7 +25,6 @@ SPAN_LLM_MESSAGE_ROLE_MAX_STR_LENGTH = 100
|
|
|
29
25
|
SPAN_LLM_MESSAGE_CONTENT_MAX_STR_LENGTH = 4_000_000
|
|
30
26
|
SPAN_LLM_TOOL_CALL_FUNCTION_NAME_MAX_STR_LENGTH = 500
|
|
31
27
|
SPAN_LLM_PROMPT_TEMPLATE_MAX_STR_LENGTH = 4_000_000
|
|
32
|
-
# SPAN_LLM_PROMPT_TEMPLATE_VARIABLES_MAX_STR_LENGTH = 10_000
|
|
33
28
|
SPAN_LLM_PROMPT_TEMPLATE_VERSION_MAX_STR_LENGTH = 100
|
|
34
29
|
SPAN_TOOL_NAME_MAX_STR_LENGTH = 100
|
|
35
30
|
SPAN_TOOL_DESCRIPTION_MAX_STR_LENGTH = 1_000
|
|
@@ -43,7 +38,7 @@ JSON_STRING_MAX_STR_LENGTH = 4_000_000
|
|
|
43
38
|
EVAL_LABEL_MIN_STR_LENGTH = 1 # we do not accept empty strings
|
|
44
39
|
EVAL_LABEL_MAX_STR_LENGTH = 100
|
|
45
40
|
EVAL_EXPLANATION_MAX_STR_LENGTH = 10_000
|
|
46
|
-
|
|
41
|
+
|
|
47
42
|
# # Annotation related constants
|
|
48
43
|
ANNOTATION_LABEL_MIN_STR_LENGTH = 1
|
|
49
44
|
ANNOTATION_LABEL_MAX_STR_LENGTH = 100 # Max length for annotation label string
|
arize/datasets/client.py
CHANGED
|
@@ -24,6 +24,7 @@ from arize.utils.openinference_conversion import (
|
|
|
24
24
|
from arize.utils.size import get_payload_size_mb
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
27
|
+
from arize._generated.api_client.api_client import ApiClient
|
|
27
28
|
from arize.config import SDKConfiguration
|
|
28
29
|
|
|
29
30
|
logger = logging.getLogger(__name__)
|
|
@@ -41,18 +42,21 @@ class DatasetsClient:
|
|
|
41
42
|
:class:`arize.config.SDKConfiguration`.
|
|
42
43
|
"""
|
|
43
44
|
|
|
44
|
-
def __init__(
|
|
45
|
+
def __init__(
|
|
46
|
+
self, *, sdk_config: SDKConfiguration, generated_client: ApiClient
|
|
47
|
+
) -> None:
|
|
45
48
|
"""
|
|
46
49
|
Args:
|
|
47
50
|
sdk_config: Resolved SDK configuration.
|
|
51
|
+
generated_client: Shared generated API client instance.
|
|
48
52
|
""" # noqa: D205, D212
|
|
49
53
|
self._sdk_config = sdk_config
|
|
50
54
|
|
|
51
55
|
# Import at runtime so it's still lazy and extras-gated by the parent
|
|
52
56
|
from arize._generated import api_client as gen
|
|
53
57
|
|
|
54
|
-
# Use the
|
|
55
|
-
self._api = gen.DatasetsApi(
|
|
58
|
+
# Use the provided client directly
|
|
59
|
+
self._api = gen.DatasetsApi(generated_client)
|
|
56
60
|
|
|
57
61
|
@prerelease_endpoint(key="datasets.list", stage=ReleaseStage.BETA)
|
|
58
62
|
def list(
|
|
@@ -117,7 +121,7 @@ class DatasetsClient:
|
|
|
117
121
|
space_id: Space ID to create the dataset in.
|
|
118
122
|
examples: Dataset examples either as:
|
|
119
123
|
- a list of JSON-like dicts, or
|
|
120
|
-
- a pandas
|
|
124
|
+
- a :class:`pandas.DataFrame` (will be converted to records for REST).
|
|
121
125
|
force_http: If True, force REST upload even if the payload exceeds the
|
|
122
126
|
configured REST payload threshold.
|
|
123
127
|
|
|
@@ -125,7 +129,7 @@ class DatasetsClient:
|
|
|
125
129
|
The created dataset object as returned by the API.
|
|
126
130
|
|
|
127
131
|
Raises:
|
|
128
|
-
TypeError: If `examples` is not a list of dicts or a pandas
|
|
132
|
+
TypeError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
|
|
129
133
|
RuntimeError: If the Flight upload path is selected and the Flight request
|
|
130
134
|
fails.
|
|
131
135
|
arize._generated.api_client.exceptions.ApiException: If the REST API
|
|
@@ -205,7 +209,8 @@ class DatasetsClient:
|
|
|
205
209
|
Args:
|
|
206
210
|
dataset_id: Dataset ID to delete.
|
|
207
211
|
|
|
208
|
-
Returns:
|
|
212
|
+
Returns:
|
|
213
|
+
This method returns None on success (common empty 204 response).
|
|
209
214
|
|
|
210
215
|
Raises:
|
|
211
216
|
arize._generated.api_client.exceptions.ApiException: If the REST API
|
|
@@ -354,14 +359,13 @@ class DatasetsClient:
|
|
|
354
359
|
the latest dataset version is selected.
|
|
355
360
|
examples: Examples to append, provided as either:
|
|
356
361
|
- a list of JSON-like dicts, or
|
|
357
|
-
- a pandas
|
|
362
|
+
- a :class:`pandas.DataFrame` (converted to records before upload).
|
|
358
363
|
|
|
359
364
|
Returns:
|
|
360
365
|
The updated dataset object. To see the examples, use `list_examples()`.
|
|
361
366
|
|
|
362
367
|
Raises:
|
|
363
|
-
AssertionError: If `examples` is not a list of dicts or a pandas
|
|
364
|
-
DataFrame.
|
|
368
|
+
AssertionError: If `examples` is not a list of dicts or a :class:`pandas.DataFrame`.
|
|
365
369
|
arize._generated.api_client.exceptions.ApiException: If the REST API
|
|
366
370
|
returns an error response (e.g. 400/401/403/404/429).
|
|
367
371
|
"""
|
arize/datasets/errors.py
CHANGED
|
@@ -80,7 +80,7 @@ class RequiredColumnsError(DatasetError):
|
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
class EmptyDatasetError(DatasetError):
|
|
83
|
-
"""Raised when dataset DataFrame has no rows."""
|
|
83
|
+
"""Raised when dataset :class:`pandas.DataFrame` has no rows."""
|
|
84
84
|
|
|
85
85
|
def error_message(self) -> str:
|
|
86
86
|
"""Return the error message for this exception."""
|
arize/datasets/validation.py
CHANGED
|
@@ -8,12 +8,12 @@ from arize.datasets import errors as err
|
|
|
8
8
|
def validate_dataset_df(
|
|
9
9
|
df: pd.DataFrame,
|
|
10
10
|
) -> list[err.DatasetError]:
|
|
11
|
-
"""Validate a dataset DataFrame for structural and content errors.
|
|
11
|
+
"""Validate a dataset :class:`pandas.DataFrame` for structural and content errors.
|
|
12
12
|
|
|
13
13
|
Checks for required columns, unique ID values, and non-empty data.
|
|
14
14
|
|
|
15
15
|
Args:
|
|
16
|
-
df: The pandas
|
|
16
|
+
df: The :class:`pandas.DataFrame` to validate.
|
|
17
17
|
|
|
18
18
|
Returns:
|
|
19
19
|
A list of DatasetError objects found during validation. Empty list if valid.
|