arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +9 -2
- arize/_client_factory.py +50 -0
- arize/_exporter/client.py +18 -17
- arize/_exporter/parsers/tracing_data_parser.py +9 -4
- arize/_exporter/validation.py +1 -1
- arize/_flight/client.py +37 -17
- arize/_generated/api_client/api/datasets_api.py +6 -6
- arize/_generated/api_client/api/experiments_api.py +6 -6
- arize/_generated/api_client/api/projects_api.py +3 -3
- arize/_lazy.py +61 -10
- arize/client.py +66 -50
- arize/config.py +175 -48
- arize/constants/config.py +1 -0
- arize/constants/ml.py +9 -16
- arize/constants/spans.py +5 -10
- arize/datasets/client.py +45 -28
- arize/datasets/errors.py +1 -1
- arize/datasets/validation.py +2 -2
- arize/embeddings/auto_generator.py +16 -9
- arize/embeddings/base_generators.py +15 -9
- arize/embeddings/cv_generators.py +2 -2
- arize/embeddings/errors.py +2 -2
- arize/embeddings/nlp_generators.py +8 -8
- arize/embeddings/tabular_generators.py +6 -6
- arize/exceptions/base.py +0 -52
- arize/exceptions/config.py +22 -0
- arize/exceptions/parameters.py +1 -330
- arize/exceptions/values.py +8 -5
- arize/experiments/__init__.py +4 -0
- arize/experiments/client.py +31 -18
- arize/experiments/evaluators/base.py +12 -9
- arize/experiments/evaluators/executors.py +16 -7
- arize/experiments/evaluators/rate_limiters.py +3 -1
- arize/experiments/evaluators/types.py +9 -7
- arize/experiments/evaluators/utils.py +7 -5
- arize/experiments/functions.py +128 -58
- arize/experiments/tracing.py +4 -1
- arize/experiments/types.py +34 -31
- arize/logging.py +54 -33
- arize/ml/batch_validation/errors.py +10 -1004
- arize/ml/batch_validation/validator.py +351 -291
- arize/ml/bounded_executor.py +25 -6
- arize/ml/casting.py +51 -33
- arize/ml/client.py +43 -35
- arize/ml/proto.py +21 -22
- arize/ml/stream_validation.py +64 -27
- arize/ml/surrogate_explainer/mimic.py +18 -10
- arize/ml/types.py +27 -67
- arize/pre_releases.py +10 -6
- arize/projects/client.py +9 -4
- arize/py.typed +0 -0
- arize/regions.py +11 -11
- arize/spans/client.py +125 -31
- arize/spans/columns.py +32 -36
- arize/spans/conversion.py +12 -11
- arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
- arize/spans/validation/annotations/value_validation.py +11 -14
- arize/spans/validation/common/argument_validation.py +3 -3
- arize/spans/validation/common/dataframe_form_validation.py +7 -7
- arize/spans/validation/common/value_validation.py +11 -14
- arize/spans/validation/evals/dataframe_form_validation.py +4 -4
- arize/spans/validation/evals/evals_validation.py +6 -6
- arize/spans/validation/evals/value_validation.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +1 -1
- arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
- arize/spans/validation/metadata/value_validation.py +23 -1
- arize/spans/validation/spans/dataframe_form_validation.py +2 -2
- arize/spans/validation/spans/spans_validation.py +6 -6
- arize/utils/arrow.py +38 -2
- arize/utils/cache.py +2 -2
- arize/utils/dataframe.py +4 -4
- arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
- arize/utils/openinference_conversion.py +10 -10
- arize/utils/proto.py +0 -1
- arize/utils/types.py +6 -6
- arize/version.py +1 -1
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0
arize/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Mapping
|
|
5
|
+
from typing import Literal, cast
|
|
5
6
|
|
|
6
7
|
from arize._generated.api_client import models
|
|
7
8
|
from arize.client import ArizeClient
|
|
@@ -37,7 +38,7 @@ def make_to_df(field_name: str) -> object:
|
|
|
37
38
|
json_normalize: bool = False,
|
|
38
39
|
convert_dtypes: bool = True,
|
|
39
40
|
) -> object:
|
|
40
|
-
"""Convert a list of objects to a pandas
|
|
41
|
+
"""Convert a list of objects to a :class:`pandas.DataFrame`.
|
|
41
42
|
|
|
42
43
|
Behavior:
|
|
43
44
|
- If an item is a Pydantic v2 model, use `.model_dump(by_alias=...)`.
|
|
@@ -82,7 +83,11 @@ def make_to_df(field_name: str) -> object:
|
|
|
82
83
|
|
|
83
84
|
# Drop None/NaN columns if requested
|
|
84
85
|
if exclude_none in ("any", "all", True):
|
|
85
|
-
drop_how
|
|
86
|
+
drop_how: Literal["any", "all"] = (
|
|
87
|
+
"all"
|
|
88
|
+
if exclude_none is True
|
|
89
|
+
else cast("Literal['any', 'all']", exclude_none)
|
|
90
|
+
)
|
|
86
91
|
df.dropna(axis=1, how=drop_how, inplace=True)
|
|
87
92
|
|
|
88
93
|
if convert_dtypes:
|
|
@@ -92,6 +97,8 @@ def make_to_df(field_name: str) -> object:
|
|
|
92
97
|
return to_df
|
|
93
98
|
|
|
94
99
|
|
|
100
|
+
# Monkey-patch convenience methods onto generated response models
|
|
101
|
+
# Type ignore comments needed: mypy can't verify runtime attribute additions
|
|
95
102
|
models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
|
|
96
103
|
models.DatasetsExamplesList200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
|
|
97
104
|
models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
|
arize/_client_factory.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Factory for creating and caching the generated OpenAPI client."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from arize._generated.api_client.api_client import ApiClient
|
|
10
|
+
from arize.config import SDKConfiguration
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GeneratedClientFactory:
|
|
14
|
+
"""Factory for creating and caching generated OpenAPI clients.
|
|
15
|
+
|
|
16
|
+
This factory is owned by ArizeClient and provides thread-safe lazy
|
|
17
|
+
initialization of the OpenAPI client used by various subclients.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, sdk_config: SDKConfiguration) -> None:
|
|
21
|
+
"""Initialize the factory.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
sdk_config: SDK configuration containing API settings.
|
|
25
|
+
"""
|
|
26
|
+
self._sdk_config = sdk_config
|
|
27
|
+
self._client: ApiClient | None = None
|
|
28
|
+
self._lock = threading.Lock()
|
|
29
|
+
|
|
30
|
+
def get_client(self) -> ApiClient:
|
|
31
|
+
"""Get or create the generated OpenAPI client instance.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
The shared generated API client instance.
|
|
35
|
+
"""
|
|
36
|
+
if self._client is not None:
|
|
37
|
+
return self._client
|
|
38
|
+
|
|
39
|
+
with self._lock:
|
|
40
|
+
if self._client is not None:
|
|
41
|
+
return self._client
|
|
42
|
+
|
|
43
|
+
# Import lazily to avoid extra dependencies at config time
|
|
44
|
+
from arize._generated import api_client as gen
|
|
45
|
+
|
|
46
|
+
cfg = gen.Configuration(host=self._sdk_config.api_url)
|
|
47
|
+
if self._sdk_config.api_key:
|
|
48
|
+
cfg.access_token = self._sdk_config.api_key
|
|
49
|
+
self._client = gen.ApiClient(cfg)
|
|
50
|
+
return self._client
|
arize/_exporter/client.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# type: ignore[pb2]
|
|
2
1
|
import logging
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from datetime import datetime
|
|
@@ -41,7 +40,7 @@ class ArizeExportClient:
|
|
|
41
40
|
batch_id: str = "",
|
|
42
41
|
include_actuals: bool = False,
|
|
43
42
|
stream_chunk_size: int | None = None,
|
|
44
|
-
) ->
|
|
43
|
+
) -> pd.DataFrame:
|
|
45
44
|
"""Exports data of a specific model in the Arize platform to a pandas dataframe.
|
|
46
45
|
|
|
47
46
|
The export covers a defined time interval and model environment, and can
|
|
@@ -237,6 +236,22 @@ class ArizeExportClient:
|
|
|
237
236
|
columns: list | None = None,
|
|
238
237
|
stream_chunk_size: int | None = None,
|
|
239
238
|
) -> tuple[flight.FlightStreamReader | None, int]:
|
|
239
|
+
# Validate inputs first before creating logging context
|
|
240
|
+
validate_input_type(space_id, "space_id", str)
|
|
241
|
+
validate_input_type(model_id, "model_id", str)
|
|
242
|
+
validate_input_type(environment, "environment", Environments)
|
|
243
|
+
validate_input_type(include_actuals, "include_actuals", bool)
|
|
244
|
+
validate_input_type(start_time, "start_time", datetime)
|
|
245
|
+
validate_input_type(end_time, "end_time", datetime)
|
|
246
|
+
validate_input_type(model_version, "model_version", str)
|
|
247
|
+
validate_input_type(batch_id, "batch_id", str)
|
|
248
|
+
validate_input_type(where, "where", str)
|
|
249
|
+
validate_input_type(columns, "columns", list, allow_none=True)
|
|
250
|
+
validate_input_type(
|
|
251
|
+
stream_chunk_size, "stream_chunk_size", int, allow_none=True
|
|
252
|
+
)
|
|
253
|
+
validate_start_end_time(start_time, end_time)
|
|
254
|
+
|
|
240
255
|
# Bind common context for this operation
|
|
241
256
|
log = CtxAdapter(
|
|
242
257
|
logger,
|
|
@@ -258,20 +273,6 @@ class ArizeExportClient:
|
|
|
258
273
|
},
|
|
259
274
|
)
|
|
260
275
|
log.debug("Getting stream reader...")
|
|
261
|
-
validate_input_type(space_id, "space_id", str)
|
|
262
|
-
validate_input_type(model_id, "model_id", str)
|
|
263
|
-
validate_input_type(environment, "environment", Environments)
|
|
264
|
-
validate_input_type(include_actuals, "include_actuals", bool)
|
|
265
|
-
validate_input_type(start_time, "start_time", datetime)
|
|
266
|
-
validate_input_type(end_time, "end_time", datetime)
|
|
267
|
-
validate_input_type(model_version, "model_version", str)
|
|
268
|
-
validate_input_type(batch_id, "batch_id", str)
|
|
269
|
-
validate_input_type(where, "where", str)
|
|
270
|
-
validate_input_type(columns, "columns", list, allow_none=True)
|
|
271
|
-
validate_input_type(
|
|
272
|
-
stream_chunk_size, "stream_chunk_size", int, allow_none=True
|
|
273
|
-
)
|
|
274
|
-
validate_start_end_time(start_time, end_time)
|
|
275
276
|
|
|
276
277
|
# Create query descriptor
|
|
277
278
|
query_descriptor = flight_pb2.RecordQueryDescriptor(
|
|
@@ -300,7 +301,7 @@ class ArizeExportClient:
|
|
|
300
301
|
try:
|
|
301
302
|
flight_info = self.flight_client.get_flight_info(
|
|
302
303
|
flight.FlightDescriptor.for_command(
|
|
303
|
-
json_format.MessageToJson(query_descriptor)
|
|
304
|
+
json_format.MessageToJson(query_descriptor)
|
|
304
305
|
),
|
|
305
306
|
)
|
|
306
307
|
logger.info("Fetching data...")
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import Any, TypeGuard
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
7
|
import pandas as pd
|
|
@@ -28,7 +30,10 @@ logger = logging.getLogger(__name__)
|
|
|
28
30
|
# of the error is on the data; It should not prevent a user from continuing to use the data
|
|
29
31
|
class OtelTracingDataTransformer:
|
|
30
32
|
def _apply_column_transformation(
|
|
31
|
-
self,
|
|
33
|
+
self,
|
|
34
|
+
df: pd.DataFrame,
|
|
35
|
+
col_name: str,
|
|
36
|
+
transform_func: Callable[[Any], Any],
|
|
32
37
|
) -> str | None:
|
|
33
38
|
"""Apply a transformation to a column and return error message if it fails."""
|
|
34
39
|
try:
|
|
@@ -89,7 +94,7 @@ class OtelTracingDataTransformer:
|
|
|
89
94
|
if col.name in df.columns
|
|
90
95
|
]
|
|
91
96
|
for col_name in dirty_string_column_names:
|
|
92
|
-
df[col_name] = df[col_name].apply(self._clean_json_string)
|
|
97
|
+
df[col_name] = df[col_name].apply(self._clean_json_string) # type: ignore[arg-type]
|
|
93
98
|
|
|
94
99
|
# Convert timestamp columns to datetime objects
|
|
95
100
|
timestamp_column_names: list[str] = [
|
|
@@ -102,7 +107,7 @@ class OtelTracingDataTransformer:
|
|
|
102
107
|
]
|
|
103
108
|
for col_name in timestamp_column_names:
|
|
104
109
|
df[col_name] = df[col_name].apply(
|
|
105
|
-
self._convert_timestamp_to_datetime
|
|
110
|
+
self._convert_timestamp_to_datetime # type: ignore[arg-type]
|
|
106
111
|
)
|
|
107
112
|
|
|
108
113
|
for err in errors:
|
|
@@ -138,7 +143,7 @@ class OtelTracingDataTransformer:
|
|
|
138
143
|
return None
|
|
139
144
|
return None
|
|
140
145
|
|
|
141
|
-
def _is_non_empty_string(self, value: object) ->
|
|
146
|
+
def _is_non_empty_string(self, value: object) -> TypeGuard[str]:
|
|
142
147
|
return isinstance(value, str) and value != ""
|
|
143
148
|
|
|
144
149
|
def _deserialize_json_string_to_dict(self, value: str) -> object:
|
arize/_exporter/validation.py
CHANGED
arize/_flight/client.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
# type: ignore[pb2]
|
|
2
1
|
from __future__ import annotations
|
|
3
2
|
|
|
4
3
|
import base64
|
|
5
4
|
import logging
|
|
6
5
|
from dataclasses import dataclass, field
|
|
7
6
|
from enum import Enum
|
|
8
|
-
from typing import TYPE_CHECKING
|
|
7
|
+
from typing import TYPE_CHECKING, TypeAlias
|
|
9
8
|
|
|
10
9
|
from google.protobuf import json_format
|
|
11
10
|
from pyarrow import flight
|
|
@@ -20,15 +19,15 @@ from arize.version import __version__
|
|
|
20
19
|
|
|
21
20
|
if TYPE_CHECKING:
|
|
22
21
|
import types
|
|
23
|
-
from collections.abc import
|
|
22
|
+
from collections.abc import Iterator
|
|
24
23
|
|
|
25
24
|
import pandas as pd
|
|
26
25
|
import pyarrow as pa
|
|
27
26
|
|
|
28
27
|
|
|
29
|
-
BytesPair = tuple[bytes, bytes]
|
|
30
|
-
Headers = list[BytesPair]
|
|
31
|
-
FlightPostArrowFileResponse = (
|
|
28
|
+
BytesPair: TypeAlias = tuple[bytes, bytes]
|
|
29
|
+
Headers: TypeAlias = list[BytesPair]
|
|
30
|
+
FlightPostArrowFileResponse: TypeAlias = (
|
|
32
31
|
flight_pb2.WriteSpanEvaluationResponse
|
|
33
32
|
| flight_pb2.WriteSpanAnnotationResponse
|
|
34
33
|
| flight_pb2.WriteSpanAttributesMetadataResponse
|
|
@@ -90,7 +89,11 @@ class ArizeFlightClient:
|
|
|
90
89
|
# ---------- Connection management ----------
|
|
91
90
|
|
|
92
91
|
def _ensure_client(self) -> flight.FlightClient:
|
|
93
|
-
"""Lazily initialize and return the underlying Flight client connection.
|
|
92
|
+
"""Lazily initialize and return the underlying Flight client connection.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
flight.FlightClient: The initialized Apache Arrow Flight client.
|
|
96
|
+
"""
|
|
94
97
|
client = object.__getattribute__(self, "_client")
|
|
95
98
|
if client is not None:
|
|
96
99
|
return client
|
|
@@ -135,7 +138,11 @@ class ArizeFlightClient:
|
|
|
135
138
|
# ---------- methods simple passthrough wrappers ----------
|
|
136
139
|
|
|
137
140
|
def get_flight_info(self, *args: object, **kwargs: object) -> object:
|
|
138
|
-
"""Get flight information. Passthrough to underlying Flight client with auth options.
|
|
141
|
+
"""Get flight information. Passthrough to underlying Flight client with auth options.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
object: FlightInfo object containing metadata about the requested data stream.
|
|
145
|
+
"""
|
|
139
146
|
client = self._ensure_client()
|
|
140
147
|
kwargs.setdefault("options", self.call_options)
|
|
141
148
|
return client.get_flight_info(*args, **kwargs)
|
|
@@ -146,6 +153,9 @@ class ArizeFlightClient:
|
|
|
146
153
|
"""Retrieve data stream via Flight DoGet.
|
|
147
154
|
|
|
148
155
|
Passthrough to underlying Flight client with auth options.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
flight.FlightStreamReader: A stream reader for retrieving Arrow record batches.
|
|
149
159
|
"""
|
|
150
160
|
client = self._ensure_client()
|
|
151
161
|
kwargs.setdefault("options", self.call_options)
|
|
@@ -153,10 +163,15 @@ class ArizeFlightClient:
|
|
|
153
163
|
|
|
154
164
|
def do_put(
|
|
155
165
|
self, *args: object, **kwargs: object
|
|
156
|
-
) -> [flight.FlightStreamWriter, flight.FlightMetadataReader]:
|
|
166
|
+
) -> tuple[flight.FlightStreamWriter, flight.FlightMetadataReader]:
|
|
157
167
|
"""Upload data stream via Flight DoPut.
|
|
158
168
|
|
|
159
169
|
Passthrough to underlying Flight client with auth options.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
tuple[flight.FlightStreamWriter, flight.FlightMetadataReader]: A tuple containing
|
|
173
|
+
a stream writer for uploading Arrow record batches and a metadata reader for
|
|
174
|
+
receiving server responses.
|
|
160
175
|
"""
|
|
161
176
|
client = self._ensure_client()
|
|
162
177
|
kwargs.setdefault("options", self.call_options)
|
|
@@ -164,10 +179,13 @@ class ArizeFlightClient:
|
|
|
164
179
|
|
|
165
180
|
def do_action(
|
|
166
181
|
self, *args: object, **kwargs: object
|
|
167
|
-
) ->
|
|
182
|
+
) -> Iterator[flight.Result]:
|
|
168
183
|
"""Execute an action via Flight DoAction.
|
|
169
184
|
|
|
170
185
|
Passthrough to underlying Flight client with auth options.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Iterable[flight.Result]: An iterable of Result objects from the action execution.
|
|
171
189
|
"""
|
|
172
190
|
client = self._ensure_client()
|
|
173
191
|
kwargs.setdefault("options", self.call_options)
|
|
@@ -210,6 +228,10 @@ class ArizeFlightClient:
|
|
|
210
228
|
FlightRequestType.ANNOTATION,
|
|
211
229
|
FlightRequestType.METADATA,
|
|
212
230
|
):
|
|
231
|
+
if project_name is None:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
f"project_name is required for {request_type.name} request type"
|
|
234
|
+
)
|
|
213
235
|
proto_schema = get_pb_schema_tracing(project_name=project_name)
|
|
214
236
|
base64_schema = base64.b64encode(proto_schema.SerializeToString())
|
|
215
237
|
pa_schema = append_to_pyarrow_metadata(
|
|
@@ -260,8 +282,6 @@ class ArizeFlightClient:
|
|
|
260
282
|
case FlightRequestType.LOG_EXPERIMENT_DATA:
|
|
261
283
|
res = flight_pb2.PostExperimentDataResponse()
|
|
262
284
|
res.ParseFromString(flight_response.to_pybytes())
|
|
263
|
-
case _:
|
|
264
|
-
raise ValueError(f"Unsupported request_type: {request_type}")
|
|
265
285
|
return res
|
|
266
286
|
|
|
267
287
|
# ---------- dataset methods ----------
|
|
@@ -271,7 +291,7 @@ class ArizeFlightClient:
|
|
|
271
291
|
space_id: str,
|
|
272
292
|
dataset_name: str,
|
|
273
293
|
pa_table: pa.Table,
|
|
274
|
-
) -> str:
|
|
294
|
+
) -> str | None:
|
|
275
295
|
"""Create a new dataset via Flight DoPut.
|
|
276
296
|
|
|
277
297
|
Args:
|
|
@@ -333,8 +353,8 @@ class ArizeFlightClient:
|
|
|
333
353
|
latest version.
|
|
334
354
|
|
|
335
355
|
Returns:
|
|
336
|
-
A pandas DataFrame containing the dataset examples
|
|
337
|
-
|
|
356
|
+
:class:`pandas.DataFrame`: A pandas DataFrame containing the dataset examples
|
|
357
|
+
with JSON string columns converted to dict objects.
|
|
338
358
|
|
|
339
359
|
Raises:
|
|
340
360
|
RuntimeError: If the Flight request fails.
|
|
@@ -374,8 +394,8 @@ class ArizeFlightClient:
|
|
|
374
394
|
experiment_id: Experiment ID to retrieve runs from.
|
|
375
395
|
|
|
376
396
|
Returns:
|
|
377
|
-
A pandas DataFrame containing the experiment runs
|
|
378
|
-
|
|
397
|
+
:class:`pandas.DataFrame`: A pandas DataFrame containing the experiment runs
|
|
398
|
+
with JSON string columns converted to dict objects.
|
|
379
399
|
|
|
380
400
|
Raises:
|
|
381
401
|
RuntimeError: If the Flight request fails.
|
|
@@ -940,7 +940,7 @@ class DatasetsApi:
|
|
|
940
940
|
self,
|
|
941
941
|
dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
|
|
942
942
|
dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
|
|
943
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
943
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
944
944
|
_request_timeout: Union[
|
|
945
945
|
None,
|
|
946
946
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1021,7 +1021,7 @@ class DatasetsApi:
|
|
|
1021
1021
|
self,
|
|
1022
1022
|
dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
|
|
1023
1023
|
dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
|
|
1024
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1024
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1025
1025
|
_request_timeout: Union[
|
|
1026
1026
|
None,
|
|
1027
1027
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1102,7 +1102,7 @@ class DatasetsApi:
|
|
|
1102
1102
|
self,
|
|
1103
1103
|
dataset_id: Annotated[StrictStr, Field(description="The unique identifier of the dataset")],
|
|
1104
1104
|
dataset_version_id: Annotated[Optional[StrictStr], Field(description="The unique identifier of the dataset version")] = None,
|
|
1105
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1105
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1106
1106
|
_request_timeout: Union[
|
|
1107
1107
|
None,
|
|
1108
1108
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1861,7 +1861,7 @@ class DatasetsApi:
|
|
|
1861
1861
|
def datasets_list(
|
|
1862
1862
|
self,
|
|
1863
1863
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
1864
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1864
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1865
1865
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1866
1866
|
_request_timeout: Union[
|
|
1867
1867
|
None,
|
|
@@ -1941,7 +1941,7 @@ class DatasetsApi:
|
|
|
1941
1941
|
def datasets_list_with_http_info(
|
|
1942
1942
|
self,
|
|
1943
1943
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
1944
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1944
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1945
1945
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1946
1946
|
_request_timeout: Union[
|
|
1947
1947
|
None,
|
|
@@ -2021,7 +2021,7 @@ class DatasetsApi:
|
|
|
2021
2021
|
def datasets_list_without_preload_content(
|
|
2022
2022
|
self,
|
|
2023
2023
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
2024
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
2024
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
2025
2025
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
2026
2026
|
_request_timeout: Union[
|
|
2027
2027
|
None,
|
|
@@ -898,7 +898,7 @@ class ExperimentsApi:
|
|
|
898
898
|
def experiments_list(
|
|
899
899
|
self,
|
|
900
900
|
dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
|
|
901
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
901
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
902
902
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
903
903
|
_request_timeout: Union[
|
|
904
904
|
None,
|
|
@@ -978,7 +978,7 @@ class ExperimentsApi:
|
|
|
978
978
|
def experiments_list_with_http_info(
|
|
979
979
|
self,
|
|
980
980
|
dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
|
|
981
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
981
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
982
982
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
983
983
|
_request_timeout: Union[
|
|
984
984
|
None,
|
|
@@ -1058,7 +1058,7 @@ class ExperimentsApi:
|
|
|
1058
1058
|
def experiments_list_without_preload_content(
|
|
1059
1059
|
self,
|
|
1060
1060
|
dataset_id: Annotated[Optional[StrictStr], Field(description="Filter experiments to a particular dataset ID")] = None,
|
|
1061
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1061
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1062
1062
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1063
1063
|
_request_timeout: Union[
|
|
1064
1064
|
None,
|
|
@@ -1211,7 +1211,7 @@ class ExperimentsApi:
|
|
|
1211
1211
|
def experiments_runs_list(
|
|
1212
1212
|
self,
|
|
1213
1213
|
experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
|
|
1214
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1214
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1215
1215
|
_request_timeout: Union[
|
|
1216
1216
|
None,
|
|
1217
1217
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1288,7 +1288,7 @@ class ExperimentsApi:
|
|
|
1288
1288
|
def experiments_runs_list_with_http_info(
|
|
1289
1289
|
self,
|
|
1290
1290
|
experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
|
|
1291
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1291
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1292
1292
|
_request_timeout: Union[
|
|
1293
1293
|
None,
|
|
1294
1294
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -1365,7 +1365,7 @@ class ExperimentsApi:
|
|
|
1365
1365
|
def experiments_runs_list_without_preload_content(
|
|
1366
1366
|
self,
|
|
1367
1367
|
experiment_id: Annotated[StrictStr, Field(description="The unique identifier of the experiment")],
|
|
1368
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1368
|
+
limit: Annotated[Optional[Annotated[int, Field(le=500, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1369
1369
|
_request_timeout: Union[
|
|
1370
1370
|
None,
|
|
1371
1371
|
Annotated[StrictFloat, Field(gt=0)],
|
|
@@ -891,7 +891,7 @@ class ProjectsApi:
|
|
|
891
891
|
def projects_list(
|
|
892
892
|
self,
|
|
893
893
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
894
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
894
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
895
895
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
896
896
|
_request_timeout: Union[
|
|
897
897
|
None,
|
|
@@ -970,7 +970,7 @@ class ProjectsApi:
|
|
|
970
970
|
def projects_list_with_http_info(
|
|
971
971
|
self,
|
|
972
972
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
973
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
973
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
974
974
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
975
975
|
_request_timeout: Union[
|
|
976
976
|
None,
|
|
@@ -1049,7 +1049,7 @@ class ProjectsApi:
|
|
|
1049
1049
|
def projects_list_without_preload_content(
|
|
1050
1050
|
self,
|
|
1051
1051
|
space_id: Annotated[Optional[StrictStr], Field(description="Filter search results to a particular space ID")] = None,
|
|
1052
|
-
limit: Annotated[Optional[Annotated[int, Field(le=
|
|
1052
|
+
limit: Annotated[Optional[Annotated[int, Field(le=100, strict=True, ge=1)]], Field(description="Maximum items to return")] = None,
|
|
1053
1053
|
cursor: Annotated[Optional[StrictStr], Field(description="Opaque pagination cursor returned from a previous response (`pagination.next_cursor`). Treat it as an unreadable token; do not attempt to parse or construct it. ")] = None,
|
|
1054
1054
|
_request_timeout: Union[
|
|
1055
1055
|
None,
|
arize/_lazy.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# src/arize/_lazy.py
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
|
+
import inspect
|
|
4
5
|
import logging
|
|
5
6
|
import sys
|
|
6
7
|
import threading
|
|
@@ -8,6 +9,8 @@ from importlib import import_module
|
|
|
8
9
|
from typing import TYPE_CHECKING, ClassVar
|
|
9
10
|
|
|
10
11
|
if TYPE_CHECKING:
|
|
12
|
+
import types
|
|
13
|
+
|
|
11
14
|
from arize.config import SDKConfiguration
|
|
12
15
|
|
|
13
16
|
logger = logging.getLogger(__name__)
|
|
@@ -22,6 +25,11 @@ class LazySubclientsMixin:
|
|
|
22
25
|
self._lazy_cache: dict[str, object] = {}
|
|
23
26
|
self._lazy_lock = threading.Lock()
|
|
24
27
|
|
|
28
|
+
# Add generated client factory
|
|
29
|
+
from arize._client_factory import GeneratedClientFactory
|
|
30
|
+
|
|
31
|
+
self._gen_client_factory = GeneratedClientFactory(sdk_config)
|
|
32
|
+
|
|
25
33
|
def __getattr__(self, name: str) -> object:
|
|
26
34
|
subs = self._SUBCLIENTS
|
|
27
35
|
if name not in subs:
|
|
@@ -41,12 +49,18 @@ class LazySubclientsMixin:
|
|
|
41
49
|
module = _dynamic_import(module_path)
|
|
42
50
|
klass = getattr(module, class_name)
|
|
43
51
|
|
|
44
|
-
#
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
52
|
+
# Determine which parameters this subclient needs
|
|
53
|
+
# and build kwargs accordingly
|
|
54
|
+
sig = inspect.signature(klass.__init__)
|
|
55
|
+
kwargs: dict[str, object] = {}
|
|
56
|
+
if "sdk_config" in sig.parameters:
|
|
57
|
+
kwargs["sdk_config"] = self.sdk_config
|
|
58
|
+
if "generated_client" in sig.parameters:
|
|
59
|
+
kwargs["generated_client"] = (
|
|
60
|
+
self._gen_client_factory.get_client()
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
instance = klass(**kwargs)
|
|
50
64
|
self._lazy_cache[name] = instance
|
|
51
65
|
return instance
|
|
52
66
|
|
|
@@ -58,7 +72,14 @@ class OptionalDependencyError(ImportError): ...
|
|
|
58
72
|
|
|
59
73
|
|
|
60
74
|
def _can_import(module_name: str) -> bool:
|
|
61
|
-
"""Check if a module can be imported without raising an exception.
|
|
75
|
+
"""Check if a module can be imported without raising an exception.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
module_name: The fully qualified module name to check (e.g., 'numpy', 'sklearn.preprocessing').
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
bool: True if the module can be imported successfully, False otherwise.
|
|
82
|
+
"""
|
|
62
83
|
try:
|
|
63
84
|
import_module(module_name)
|
|
64
85
|
except Exception:
|
|
@@ -72,6 +93,18 @@ def require(
|
|
|
72
93
|
required: tuple[str, ...],
|
|
73
94
|
pkgname: str = "arize",
|
|
74
95
|
) -> None:
|
|
96
|
+
"""Ensure required optional dependencies are installed, raising an error if missing.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
extra_key: The extras group key for pip install (e.g., 'mimic', 'embeddings').
|
|
100
|
+
Used in the error message to guide users.
|
|
101
|
+
required: Tuple of required module names to check for availability.
|
|
102
|
+
pkgname: The package name for installation instructions. Defaults to 'arize'.
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
OptionalDependencyError: If any of the required modules cannot be imported.
|
|
106
|
+
The error message includes pip install instructions with the extras group.
|
|
107
|
+
"""
|
|
75
108
|
if not required:
|
|
76
109
|
return
|
|
77
110
|
missing = [p for p in required if not _can_import(p)]
|
|
@@ -82,8 +115,24 @@ def require(
|
|
|
82
115
|
)
|
|
83
116
|
|
|
84
117
|
|
|
85
|
-
def _dynamic_import(modname: str, retries: int = 2) ->
|
|
86
|
-
|
|
118
|
+
def _dynamic_import(modname: str, retries: int = 2) -> types.ModuleType:
|
|
119
|
+
"""Dynamically import a module with retry logic and sys.modules cleanup on failure.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
modname: The fully qualified module name to import.
|
|
123
|
+
retries: Number of import attempts to make. Must be > 0. Defaults to 2.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
types.ModuleType: The successfully imported module.
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
ValueError: If retries is <= 0.
|
|
130
|
+
ModuleNotFoundError: If the module cannot be found after all retry attempts.
|
|
131
|
+
ImportError: If the module import fails after all retry attempts.
|
|
132
|
+
KeyError: If a key error occurs during import after all retry attempts.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def _attempt_import(remaining_attempts: int) -> types.ModuleType:
|
|
87
136
|
try:
|
|
88
137
|
return import_module(modname)
|
|
89
138
|
except (ModuleNotFoundError, ImportError, KeyError):
|
|
@@ -92,4 +141,6 @@ def _dynamic_import(modname: str, retries: int = 2) -> object:
|
|
|
92
141
|
raise
|
|
93
142
|
return _attempt_import(remaining_attempts - 1)
|
|
94
143
|
|
|
95
|
-
|
|
144
|
+
if retries <= 0:
|
|
145
|
+
raise ValueError(f"retries must be > 0, got {retries}")
|
|
146
|
+
return _attempt_import(retries)
|