arize 8.0.0b2__py3-none-any.whl → 8.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +8 -1
- arize/_exporter/client.py +18 -17
- arize/_exporter/parsers/tracing_data_parser.py +9 -4
- arize/_exporter/validation.py +1 -1
- arize/_flight/client.py +33 -13
- arize/_lazy.py +37 -2
- arize/client.py +61 -35
- arize/config.py +168 -14
- arize/constants/config.py +1 -0
- arize/datasets/client.py +32 -19
- arize/embeddings/auto_generator.py +14 -7
- arize/embeddings/base_generators.py +15 -9
- arize/embeddings/cv_generators.py +2 -2
- arize/embeddings/nlp_generators.py +8 -8
- arize/embeddings/tabular_generators.py +5 -5
- arize/exceptions/config.py +22 -0
- arize/exceptions/parameters.py +1 -1
- arize/exceptions/values.py +8 -5
- arize/experiments/__init__.py +4 -0
- arize/experiments/client.py +17 -11
- arize/experiments/evaluators/base.py +6 -3
- arize/experiments/evaluators/executors.py +6 -4
- arize/experiments/evaluators/rate_limiters.py +3 -1
- arize/experiments/evaluators/types.py +7 -5
- arize/experiments/evaluators/utils.py +7 -5
- arize/experiments/functions.py +111 -48
- arize/experiments/tracing.py +4 -1
- arize/experiments/types.py +31 -26
- arize/logging.py +53 -32
- arize/ml/batch_validation/validator.py +82 -70
- arize/ml/bounded_executor.py +25 -6
- arize/ml/casting.py +45 -27
- arize/ml/client.py +35 -28
- arize/ml/proto.py +16 -17
- arize/ml/stream_validation.py +63 -25
- arize/ml/surrogate_explainer/mimic.py +15 -7
- arize/ml/types.py +26 -12
- arize/pre_releases.py +7 -6
- arize/py.typed +0 -0
- arize/regions.py +10 -10
- arize/spans/client.py +113 -21
- arize/spans/conversion.py +7 -5
- arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
- arize/spans/validation/annotations/value_validation.py +11 -14
- arize/spans/validation/common/dataframe_form_validation.py +1 -1
- arize/spans/validation/common/value_validation.py +10 -13
- arize/spans/validation/evals/value_validation.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +1 -1
- arize/spans/validation/metadata/dataframe_form_validation.py +1 -1
- arize/spans/validation/metadata/value_validation.py +23 -1
- arize/utils/arrow.py +37 -1
- arize/utils/online_tasks/dataframe_preprocessor.py +8 -4
- arize/utils/proto.py +0 -1
- arize/utils/types.py +6 -6
- arize/version.py +1 -1
- {arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/METADATA +18 -3
- {arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/RECORD +60 -58
- {arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/WHEEL +0 -0
- {arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b2.dist-info → arize-8.0.1.dist-info}/licenses/NOTICE +0 -0
arize/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from collections.abc import Mapping
|
|
5
|
+
from typing import Literal, cast
|
|
5
6
|
|
|
6
7
|
from arize._generated.api_client import models
|
|
7
8
|
from arize.client import ArizeClient
|
|
@@ -82,7 +83,11 @@ def make_to_df(field_name: str) -> object:
|
|
|
82
83
|
|
|
83
84
|
# Drop None/NaN columns if requested
|
|
84
85
|
if exclude_none in ("any", "all", True):
|
|
85
|
-
drop_how
|
|
86
|
+
drop_how: Literal["any", "all"] = (
|
|
87
|
+
"all"
|
|
88
|
+
if exclude_none is True
|
|
89
|
+
else cast("Literal['any', 'all']", exclude_none)
|
|
90
|
+
)
|
|
86
91
|
df.dropna(axis=1, how=drop_how, inplace=True)
|
|
87
92
|
|
|
88
93
|
if convert_dtypes:
|
|
@@ -92,6 +97,8 @@ def make_to_df(field_name: str) -> object:
|
|
|
92
97
|
return to_df
|
|
93
98
|
|
|
94
99
|
|
|
100
|
+
# Monkey-patch convenience methods onto generated response models
|
|
101
|
+
# Type ignore comments needed: mypy can't verify runtime attribute additions
|
|
95
102
|
models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
|
|
96
103
|
models.DatasetsExamplesList200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
|
|
97
104
|
models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
|
arize/_exporter/client.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# type: ignore[pb2]
|
|
2
1
|
import logging
|
|
3
2
|
from dataclasses import dataclass
|
|
4
3
|
from datetime import datetime
|
|
@@ -41,7 +40,7 @@ class ArizeExportClient:
|
|
|
41
40
|
batch_id: str = "",
|
|
42
41
|
include_actuals: bool = False,
|
|
43
42
|
stream_chunk_size: int | None = None,
|
|
44
|
-
) ->
|
|
43
|
+
) -> pd.DataFrame:
|
|
45
44
|
"""Exports data of a specific model in the Arize platform to a pandas dataframe.
|
|
46
45
|
|
|
47
46
|
The export covers a defined time interval and model environment, and can
|
|
@@ -237,6 +236,22 @@ class ArizeExportClient:
|
|
|
237
236
|
columns: list | None = None,
|
|
238
237
|
stream_chunk_size: int | None = None,
|
|
239
238
|
) -> tuple[flight.FlightStreamReader | None, int]:
|
|
239
|
+
# Validate inputs first before creating logging context
|
|
240
|
+
validate_input_type(space_id, "space_id", str)
|
|
241
|
+
validate_input_type(model_id, "model_id", str)
|
|
242
|
+
validate_input_type(environment, "environment", Environments)
|
|
243
|
+
validate_input_type(include_actuals, "include_actuals", bool)
|
|
244
|
+
validate_input_type(start_time, "start_time", datetime)
|
|
245
|
+
validate_input_type(end_time, "end_time", datetime)
|
|
246
|
+
validate_input_type(model_version, "model_version", str)
|
|
247
|
+
validate_input_type(batch_id, "batch_id", str)
|
|
248
|
+
validate_input_type(where, "where", str)
|
|
249
|
+
validate_input_type(columns, "columns", list, allow_none=True)
|
|
250
|
+
validate_input_type(
|
|
251
|
+
stream_chunk_size, "stream_chunk_size", int, allow_none=True
|
|
252
|
+
)
|
|
253
|
+
validate_start_end_time(start_time, end_time)
|
|
254
|
+
|
|
240
255
|
# Bind common context for this operation
|
|
241
256
|
log = CtxAdapter(
|
|
242
257
|
logger,
|
|
@@ -258,20 +273,6 @@ class ArizeExportClient:
|
|
|
258
273
|
},
|
|
259
274
|
)
|
|
260
275
|
log.debug("Getting stream reader...")
|
|
261
|
-
validate_input_type(space_id, "space_id", str)
|
|
262
|
-
validate_input_type(model_id, "model_id", str)
|
|
263
|
-
validate_input_type(environment, "environment", Environments)
|
|
264
|
-
validate_input_type(include_actuals, "include_actuals", bool)
|
|
265
|
-
validate_input_type(start_time, "start_time", datetime)
|
|
266
|
-
validate_input_type(end_time, "end_time", datetime)
|
|
267
|
-
validate_input_type(model_version, "model_version", str)
|
|
268
|
-
validate_input_type(batch_id, "batch_id", str)
|
|
269
|
-
validate_input_type(where, "where", str)
|
|
270
|
-
validate_input_type(columns, "columns", list, allow_none=True)
|
|
271
|
-
validate_input_type(
|
|
272
|
-
stream_chunk_size, "stream_chunk_size", int, allow_none=True
|
|
273
|
-
)
|
|
274
|
-
validate_start_end_time(start_time, end_time)
|
|
275
276
|
|
|
276
277
|
# Create query descriptor
|
|
277
278
|
query_descriptor = flight_pb2.RecordQueryDescriptor(
|
|
@@ -300,7 +301,7 @@ class ArizeExportClient:
|
|
|
300
301
|
try:
|
|
301
302
|
flight_info = self.flight_client.get_flight_info(
|
|
302
303
|
flight.FlightDescriptor.for_command(
|
|
303
|
-
json_format.MessageToJson(query_descriptor)
|
|
304
|
+
json_format.MessageToJson(query_descriptor)
|
|
304
305
|
),
|
|
305
306
|
)
|
|
306
307
|
logger.info("Fetching data...")
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import Any, TypeGuard
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
7
|
import pandas as pd
|
|
@@ -28,7 +30,10 @@ logger = logging.getLogger(__name__)
|
|
|
28
30
|
# of the error is on the data; It should not prevent a user from continuing to use the data
|
|
29
31
|
class OtelTracingDataTransformer:
|
|
30
32
|
def _apply_column_transformation(
|
|
31
|
-
self,
|
|
33
|
+
self,
|
|
34
|
+
df: pd.DataFrame,
|
|
35
|
+
col_name: str,
|
|
36
|
+
transform_func: Callable[[Any], Any],
|
|
32
37
|
) -> str | None:
|
|
33
38
|
"""Apply a transformation to a column and return error message if it fails."""
|
|
34
39
|
try:
|
|
@@ -89,7 +94,7 @@ class OtelTracingDataTransformer:
|
|
|
89
94
|
if col.name in df.columns
|
|
90
95
|
]
|
|
91
96
|
for col_name in dirty_string_column_names:
|
|
92
|
-
df[col_name] = df[col_name].apply(self._clean_json_string)
|
|
97
|
+
df[col_name] = df[col_name].apply(self._clean_json_string) # type: ignore[arg-type]
|
|
93
98
|
|
|
94
99
|
# Convert timestamp columns to datetime objects
|
|
95
100
|
timestamp_column_names: list[str] = [
|
|
@@ -102,7 +107,7 @@ class OtelTracingDataTransformer:
|
|
|
102
107
|
]
|
|
103
108
|
for col_name in timestamp_column_names:
|
|
104
109
|
df[col_name] = df[col_name].apply(
|
|
105
|
-
self._convert_timestamp_to_datetime
|
|
110
|
+
self._convert_timestamp_to_datetime # type: ignore[arg-type]
|
|
106
111
|
)
|
|
107
112
|
|
|
108
113
|
for err in errors:
|
|
@@ -138,7 +143,7 @@ class OtelTracingDataTransformer:
|
|
|
138
143
|
return None
|
|
139
144
|
return None
|
|
140
145
|
|
|
141
|
-
def _is_non_empty_string(self, value: object) ->
|
|
146
|
+
def _is_non_empty_string(self, value: object) -> TypeGuard[str]:
|
|
142
147
|
return isinstance(value, str) and value != ""
|
|
143
148
|
|
|
144
149
|
def _deserialize_json_string_to_dict(self, value: str) -> object:
|
arize/_exporter/validation.py
CHANGED
arize/_flight/client.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
|
-
# type: ignore[pb2]
|
|
2
1
|
from __future__ import annotations
|
|
3
2
|
|
|
4
3
|
import base64
|
|
5
4
|
import logging
|
|
6
5
|
from dataclasses import dataclass, field
|
|
7
6
|
from enum import Enum
|
|
8
|
-
from typing import TYPE_CHECKING
|
|
7
|
+
from typing import TYPE_CHECKING, TypeAlias
|
|
9
8
|
|
|
10
9
|
from google.protobuf import json_format
|
|
11
10
|
from pyarrow import flight
|
|
@@ -20,15 +19,15 @@ from arize.version import __version__
|
|
|
20
19
|
|
|
21
20
|
if TYPE_CHECKING:
|
|
22
21
|
import types
|
|
23
|
-
from collections.abc import
|
|
22
|
+
from collections.abc import Iterator
|
|
24
23
|
|
|
25
24
|
import pandas as pd
|
|
26
25
|
import pyarrow as pa
|
|
27
26
|
|
|
28
27
|
|
|
29
|
-
BytesPair = tuple[bytes, bytes]
|
|
30
|
-
Headers = list[BytesPair]
|
|
31
|
-
FlightPostArrowFileResponse = (
|
|
28
|
+
BytesPair: TypeAlias = tuple[bytes, bytes]
|
|
29
|
+
Headers: TypeAlias = list[BytesPair]
|
|
30
|
+
FlightPostArrowFileResponse: TypeAlias = (
|
|
32
31
|
flight_pb2.WriteSpanEvaluationResponse
|
|
33
32
|
| flight_pb2.WriteSpanAnnotationResponse
|
|
34
33
|
| flight_pb2.WriteSpanAttributesMetadataResponse
|
|
@@ -90,7 +89,11 @@ class ArizeFlightClient:
|
|
|
90
89
|
# ---------- Connection management ----------
|
|
91
90
|
|
|
92
91
|
def _ensure_client(self) -> flight.FlightClient:
|
|
93
|
-
"""Lazily initialize and return the underlying Flight client connection.
|
|
92
|
+
"""Lazily initialize and return the underlying Flight client connection.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
flight.FlightClient: The initialized Apache Arrow Flight client.
|
|
96
|
+
"""
|
|
94
97
|
client = object.__getattribute__(self, "_client")
|
|
95
98
|
if client is not None:
|
|
96
99
|
return client
|
|
@@ -135,7 +138,11 @@ class ArizeFlightClient:
|
|
|
135
138
|
# ---------- methods simple passthrough wrappers ----------
|
|
136
139
|
|
|
137
140
|
def get_flight_info(self, *args: object, **kwargs: object) -> object:
|
|
138
|
-
"""Get flight information. Passthrough to underlying Flight client with auth options.
|
|
141
|
+
"""Get flight information. Passthrough to underlying Flight client with auth options.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
object: FlightInfo object containing metadata about the requested data stream.
|
|
145
|
+
"""
|
|
139
146
|
client = self._ensure_client()
|
|
140
147
|
kwargs.setdefault("options", self.call_options)
|
|
141
148
|
return client.get_flight_info(*args, **kwargs)
|
|
@@ -146,6 +153,9 @@ class ArizeFlightClient:
|
|
|
146
153
|
"""Retrieve data stream via Flight DoGet.
|
|
147
154
|
|
|
148
155
|
Passthrough to underlying Flight client with auth options.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
flight.FlightStreamReader: A stream reader for retrieving Arrow record batches.
|
|
149
159
|
"""
|
|
150
160
|
client = self._ensure_client()
|
|
151
161
|
kwargs.setdefault("options", self.call_options)
|
|
@@ -153,10 +163,15 @@ class ArizeFlightClient:
|
|
|
153
163
|
|
|
154
164
|
def do_put(
|
|
155
165
|
self, *args: object, **kwargs: object
|
|
156
|
-
) -> [flight.FlightStreamWriter, flight.FlightMetadataReader]:
|
|
166
|
+
) -> tuple[flight.FlightStreamWriter, flight.FlightMetadataReader]:
|
|
157
167
|
"""Upload data stream via Flight DoPut.
|
|
158
168
|
|
|
159
169
|
Passthrough to underlying Flight client with auth options.
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
tuple[flight.FlightStreamWriter, flight.FlightMetadataReader]: A tuple containing
|
|
173
|
+
a stream writer for uploading Arrow record batches and a metadata reader for
|
|
174
|
+
receiving server responses.
|
|
160
175
|
"""
|
|
161
176
|
client = self._ensure_client()
|
|
162
177
|
kwargs.setdefault("options", self.call_options)
|
|
@@ -164,10 +179,13 @@ class ArizeFlightClient:
|
|
|
164
179
|
|
|
165
180
|
def do_action(
|
|
166
181
|
self, *args: object, **kwargs: object
|
|
167
|
-
) ->
|
|
182
|
+
) -> Iterator[flight.Result]:
|
|
168
183
|
"""Execute an action via Flight DoAction.
|
|
169
184
|
|
|
170
185
|
Passthrough to underlying Flight client with auth options.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Iterable[flight.Result]: An iterable of Result objects from the action execution.
|
|
171
189
|
"""
|
|
172
190
|
client = self._ensure_client()
|
|
173
191
|
kwargs.setdefault("options", self.call_options)
|
|
@@ -210,6 +228,10 @@ class ArizeFlightClient:
|
|
|
210
228
|
FlightRequestType.ANNOTATION,
|
|
211
229
|
FlightRequestType.METADATA,
|
|
212
230
|
):
|
|
231
|
+
if project_name is None:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
f"project_name is required for {request_type.name} request type"
|
|
234
|
+
)
|
|
213
235
|
proto_schema = get_pb_schema_tracing(project_name=project_name)
|
|
214
236
|
base64_schema = base64.b64encode(proto_schema.SerializeToString())
|
|
215
237
|
pa_schema = append_to_pyarrow_metadata(
|
|
@@ -260,8 +282,6 @@ class ArizeFlightClient:
|
|
|
260
282
|
case FlightRequestType.LOG_EXPERIMENT_DATA:
|
|
261
283
|
res = flight_pb2.PostExperimentDataResponse()
|
|
262
284
|
res.ParseFromString(flight_response.to_pybytes())
|
|
263
|
-
case _:
|
|
264
|
-
raise ValueError(f"Unsupported request_type: {request_type}")
|
|
265
285
|
return res
|
|
266
286
|
|
|
267
287
|
# ---------- dataset methods ----------
|
|
@@ -271,7 +291,7 @@ class ArizeFlightClient:
|
|
|
271
291
|
space_id: str,
|
|
272
292
|
dataset_name: str,
|
|
273
293
|
pa_table: pa.Table,
|
|
274
|
-
) -> str:
|
|
294
|
+
) -> str | None:
|
|
275
295
|
"""Create a new dataset via Flight DoPut.
|
|
276
296
|
|
|
277
297
|
Args:
|
arize/_lazy.py
CHANGED
|
@@ -52,7 +52,7 @@ class LazySubclientsMixin:
|
|
|
52
52
|
# Determine which parameters this subclient needs
|
|
53
53
|
# and build kwargs accordingly
|
|
54
54
|
sig = inspect.signature(klass.__init__)
|
|
55
|
-
kwargs = {}
|
|
55
|
+
kwargs: dict[str, object] = {}
|
|
56
56
|
if "sdk_config" in sig.parameters:
|
|
57
57
|
kwargs["sdk_config"] = self.sdk_config
|
|
58
58
|
if "generated_client" in sig.parameters:
|
|
@@ -72,7 +72,14 @@ class OptionalDependencyError(ImportError): ...
|
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
def _can_import(module_name: str) -> bool:
|
|
75
|
-
"""Check if a module can be imported without raising an exception.
|
|
75
|
+
"""Check if a module can be imported without raising an exception.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
module_name: The fully qualified module name to check (e.g., 'numpy', 'sklearn.preprocessing').
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
bool: True if the module can be imported successfully, False otherwise.
|
|
82
|
+
"""
|
|
76
83
|
try:
|
|
77
84
|
import_module(module_name)
|
|
78
85
|
except Exception:
|
|
@@ -86,6 +93,18 @@ def require(
|
|
|
86
93
|
required: tuple[str, ...],
|
|
87
94
|
pkgname: str = "arize",
|
|
88
95
|
) -> None:
|
|
96
|
+
"""Ensure required optional dependencies are installed, raising an error if missing.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
extra_key: The extras group key for pip install (e.g., 'mimic', 'embeddings').
|
|
100
|
+
Used in the error message to guide users.
|
|
101
|
+
required: Tuple of required module names to check for availability.
|
|
102
|
+
pkgname: The package name for installation instructions. Defaults to 'arize'.
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
OptionalDependencyError: If any of the required modules cannot be imported.
|
|
106
|
+
The error message includes pip install instructions with the extras group.
|
|
107
|
+
"""
|
|
89
108
|
if not required:
|
|
90
109
|
return
|
|
91
110
|
missing = [p for p in required if not _can_import(p)]
|
|
@@ -97,6 +116,22 @@ def require(
|
|
|
97
116
|
|
|
98
117
|
|
|
99
118
|
def _dynamic_import(modname: str, retries: int = 2) -> types.ModuleType:
|
|
119
|
+
"""Dynamically import a module with retry logic and sys.modules cleanup on failure.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
modname: The fully qualified module name to import.
|
|
123
|
+
retries: Number of import attempts to make. Must be > 0. Defaults to 2.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
types.ModuleType: The successfully imported module.
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
ValueError: If retries is <= 0.
|
|
130
|
+
ModuleNotFoundError: If the module cannot be found after all retry attempts.
|
|
131
|
+
ImportError: If the module import fails after all retry attempts.
|
|
132
|
+
KeyError: If a key error occurs during import after all retry attempts.
|
|
133
|
+
"""
|
|
134
|
+
|
|
100
135
|
def _attempt_import(remaining_attempts: int) -> types.ModuleType:
|
|
101
136
|
try:
|
|
102
137
|
return import_module(modname)
|
arize/client.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import logging
|
|
6
6
|
import shutil
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import TYPE_CHECKING, ClassVar
|
|
8
|
+
from typing import TYPE_CHECKING, ClassVar, cast
|
|
9
9
|
|
|
10
10
|
from arize._lazy import LazySubclientsMixin
|
|
11
11
|
from arize.config import SDKConfiguration
|
|
@@ -20,20 +20,11 @@ if TYPE_CHECKING:
|
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
-
# TODO(Kiko): Go private connect. Need a `base_domain`, such that we get:
|
|
24
|
-
# - api.<base_domain>
|
|
25
|
-
# - app.<base_domain>
|
|
26
|
-
# - flight.<base_domain>
|
|
27
|
-
# - otlp.<base_domain>
|
|
28
|
-
|
|
29
|
-
# TODO(Kiko): Enforce type checking, remove all type ignores
|
|
30
|
-
|
|
31
23
|
# TODO(Kiko): Go over headers on each logging call
|
|
32
24
|
# TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
|
|
33
25
|
|
|
34
26
|
# TODO(Kiko): Need to implement 'Update existing examples in a dataset'
|
|
35
27
|
|
|
36
|
-
# TODO(Kiko): test caching in colab environment
|
|
37
28
|
# TODO(Kiko): Protobuf versioning is too old
|
|
38
29
|
# TODO(Kiko): Go through main APIs and add CtxAdapter where missing
|
|
39
30
|
# TODO(Kiko): Search and handle other TODOs
|
|
@@ -128,6 +119,7 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
128
119
|
enable_caching: bool | None = None,
|
|
129
120
|
single_host: str | None = None,
|
|
130
121
|
single_port: int | None = None,
|
|
122
|
+
base_domain: str | None = None,
|
|
131
123
|
) -> None:
|
|
132
124
|
"""Initialize the Arize client with configuration parameters.
|
|
133
125
|
|
|
@@ -136,40 +128,72 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
136
128
|
|
|
137
129
|
Args:
|
|
138
130
|
api_key: Arize API key for authentication. Required - must be provided here
|
|
139
|
-
or via ARIZE_API_KEY environment variable.
|
|
131
|
+
or via ARIZE_API_KEY environment variable.
|
|
132
|
+
Raises MissingAPIKeyError if not set.
|
|
140
133
|
region: Arize region (e.g., Region.US_CENTRAL, Region.EU_WEST). When specified,
|
|
141
|
-
overrides individual host/port settings.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
134
|
+
overrides individual host/port settings.
|
|
135
|
+
ENV: ARIZE_REGION.
|
|
136
|
+
Default: Region.UNSET.
|
|
137
|
+
api_host: Custom API endpoint host.
|
|
138
|
+
ENV: ARIZE_API_HOST.
|
|
139
|
+
Default: "api.arize.com".
|
|
140
|
+
api_scheme: API endpoint scheme (http/https).
|
|
141
|
+
ENV: ARIZE_API_SCHEME.
|
|
142
|
+
Default: "https".
|
|
143
|
+
otlp_host: OTLP endpoint host.
|
|
144
|
+
ENV: ARIZE_OTLP_HOST.
|
|
145
|
+
Default: "otlp.arize.com".
|
|
146
|
+
otlp_scheme: OTLP endpoint scheme (http/https).
|
|
147
|
+
ENV: ARIZE_OTLP_SCHEME.
|
|
148
|
+
Default: "https".
|
|
149
|
+
flight_host: Apache Arrow Flight endpoint host.
|
|
150
|
+
ENV: ARIZE_FLIGHT_HOST.
|
|
147
151
|
Default: "flight.arize.com".
|
|
148
|
-
flight_port: Apache Arrow Flight endpoint port (1-65535).
|
|
152
|
+
flight_port: Apache Arrow Flight endpoint port (1-65535).
|
|
153
|
+
ENV: ARIZE_FLIGHT_PORT.
|
|
149
154
|
Default: 443.
|
|
150
|
-
flight_scheme: Apache Arrow Flight endpoint scheme.
|
|
155
|
+
flight_scheme: Apache Arrow Flight endpoint scheme.
|
|
156
|
+
ENV: ARIZE_FLIGHT_SCHEME.
|
|
151
157
|
Default: "grpc+tls".
|
|
152
158
|
pyarrow_max_chunksize: Maximum PyArrow chunk size (1 to MAX_CHUNKSIZE).
|
|
153
|
-
ENV: ARIZE_MAX_CHUNKSIZE.
|
|
154
|
-
|
|
159
|
+
ENV: ARIZE_MAX_CHUNKSIZE.
|
|
160
|
+
Default: 10_000.
|
|
161
|
+
request_verify: Whether to verify SSL certificates.
|
|
162
|
+
ENV: ARIZE_REQUEST_VERIFY.
|
|
155
163
|
Default: True.
|
|
156
164
|
stream_max_workers: Maximum worker threads for streaming (minimum: 1).
|
|
157
|
-
ENV: ARIZE_STREAM_MAX_WORKERS.
|
|
165
|
+
ENV: ARIZE_STREAM_MAX_WORKERS.
|
|
166
|
+
Default: 8.
|
|
158
167
|
stream_max_queue_bound: Maximum queue size for streaming (minimum: 1).
|
|
159
|
-
ENV: ARIZE_STREAM_MAX_QUEUE_BOUND.
|
|
168
|
+
ENV: ARIZE_STREAM_MAX_QUEUE_BOUND.
|
|
169
|
+
Default: 5000.
|
|
160
170
|
max_http_payload_size_mb: Maximum HTTP payload size in MB (minimum: 1).
|
|
161
|
-
ENV: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
|
|
171
|
+
ENV: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
|
|
172
|
+
Default: 100.
|
|
162
173
|
arize_directory: Directory for SDK files (cache, logs, etc.).
|
|
163
|
-
ENV: ARIZE_DIRECTORY.
|
|
164
|
-
|
|
174
|
+
ENV: ARIZE_DIRECTORY.
|
|
175
|
+
Default: "~/.arize".
|
|
176
|
+
enable_caching: Whether to enable local caching.
|
|
177
|
+
ENV: ARIZE_ENABLE_CACHING.
|
|
165
178
|
Default: True.
|
|
166
|
-
single_host: Single host for all endpoints
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
179
|
+
single_host: Single host for all endpoints. When specified, overrides
|
|
180
|
+
individual hosts.
|
|
181
|
+
ENV: ARIZE_SINGLE_HOST.
|
|
182
|
+
Default: None.
|
|
183
|
+
single_port: Single port for all endpoints. When specified, overrides
|
|
184
|
+
individual ports.
|
|
185
|
+
ENV: ARIZE_SINGLE_PORT.
|
|
186
|
+
Default: 0 (not set).
|
|
187
|
+
base_domain: Base domain for generating endpoint hosts as api.<base_domain>,
|
|
188
|
+
otlp.<base_domain>, flight.<base_domain>. Intended for Private Connect setups.
|
|
189
|
+
When specified, overrides individual hosts.
|
|
190
|
+
ENV: ARIZE_BASE_DOMAIN.
|
|
191
|
+
Default: None.
|
|
170
192
|
|
|
171
193
|
Raises:
|
|
172
194
|
MissingAPIKeyError: If api_key is not provided via argument or environment variable.
|
|
195
|
+
MultipleEndpointOverridesError: If multiple endpoint override options (region,
|
|
196
|
+
single_host/single_port, base_domain) are provided.
|
|
173
197
|
|
|
174
198
|
Notes:
|
|
175
199
|
Values provided to this class override environment variables, which in turn
|
|
@@ -213,6 +237,8 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
213
237
|
cfg_kwargs["single_host"] = single_host
|
|
214
238
|
if single_port is not None:
|
|
215
239
|
cfg_kwargs["single_port"] = single_port
|
|
240
|
+
if base_domain is not None:
|
|
241
|
+
cfg_kwargs["base_domain"] = base_domain
|
|
216
242
|
|
|
217
243
|
# Only the explicitly provided fields are passed; the rest use
|
|
218
244
|
# SDKConfiguration's default factories / defaults.
|
|
@@ -222,27 +248,27 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
222
248
|
@property
|
|
223
249
|
def datasets(self) -> DatasetsClient:
|
|
224
250
|
"""Access the datasets client for dataset operations (lazy-loaded)."""
|
|
225
|
-
return self.__getattr__("datasets")
|
|
251
|
+
return cast("DatasetsClient", self.__getattr__("datasets"))
|
|
226
252
|
|
|
227
253
|
@property
|
|
228
254
|
def experiments(self) -> ExperimentsClient:
|
|
229
255
|
"""Access the experiments client for experiment operations (lazy-loaded)."""
|
|
230
|
-
return self.__getattr__("experiments")
|
|
256
|
+
return cast("ExperimentsClient", self.__getattr__("experiments"))
|
|
231
257
|
|
|
232
258
|
@property
|
|
233
259
|
def ml(self) -> MLModelsClient:
|
|
234
260
|
"""Access the ML models client for ML model operations (lazy-loaded)."""
|
|
235
|
-
return self.__getattr__("ml")
|
|
261
|
+
return cast("MLModelsClient", self.__getattr__("ml"))
|
|
236
262
|
|
|
237
263
|
@property
|
|
238
264
|
def projects(self) -> ProjectsClient:
|
|
239
265
|
"""Access the projects client for project operations (lazy-loaded)."""
|
|
240
|
-
return self.__getattr__("projects")
|
|
266
|
+
return cast("ProjectsClient", self.__getattr__("projects"))
|
|
241
267
|
|
|
242
268
|
@property
|
|
243
269
|
def spans(self) -> SpansClient:
|
|
244
270
|
"""Access the spans client for tracing and span operations (lazy-loaded)."""
|
|
245
|
-
return self.__getattr__("spans")
|
|
271
|
+
return cast("SpansClient", self.__getattr__("spans"))
|
|
246
272
|
|
|
247
273
|
def __repr__(self) -> str:
|
|
248
274
|
"""Return a string representation of the Arize client configuration."""
|