arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +9 -2
- arize/_client_factory.py +50 -0
- arize/_exporter/client.py +18 -17
- arize/_exporter/parsers/tracing_data_parser.py +9 -4
- arize/_exporter/validation.py +1 -1
- arize/_flight/client.py +37 -17
- arize/_generated/api_client/api/datasets_api.py +6 -6
- arize/_generated/api_client/api/experiments_api.py +6 -6
- arize/_generated/api_client/api/projects_api.py +3 -3
- arize/_lazy.py +61 -10
- arize/client.py +66 -50
- arize/config.py +175 -48
- arize/constants/config.py +1 -0
- arize/constants/ml.py +9 -16
- arize/constants/spans.py +5 -10
- arize/datasets/client.py +45 -28
- arize/datasets/errors.py +1 -1
- arize/datasets/validation.py +2 -2
- arize/embeddings/auto_generator.py +16 -9
- arize/embeddings/base_generators.py +15 -9
- arize/embeddings/cv_generators.py +2 -2
- arize/embeddings/errors.py +2 -2
- arize/embeddings/nlp_generators.py +8 -8
- arize/embeddings/tabular_generators.py +6 -6
- arize/exceptions/base.py +0 -52
- arize/exceptions/config.py +22 -0
- arize/exceptions/parameters.py +1 -330
- arize/exceptions/values.py +8 -5
- arize/experiments/__init__.py +4 -0
- arize/experiments/client.py +31 -18
- arize/experiments/evaluators/base.py +12 -9
- arize/experiments/evaluators/executors.py +16 -7
- arize/experiments/evaluators/rate_limiters.py +3 -1
- arize/experiments/evaluators/types.py +9 -7
- arize/experiments/evaluators/utils.py +7 -5
- arize/experiments/functions.py +128 -58
- arize/experiments/tracing.py +4 -1
- arize/experiments/types.py +34 -31
- arize/logging.py +54 -33
- arize/ml/batch_validation/errors.py +10 -1004
- arize/ml/batch_validation/validator.py +351 -291
- arize/ml/bounded_executor.py +25 -6
- arize/ml/casting.py +51 -33
- arize/ml/client.py +43 -35
- arize/ml/proto.py +21 -22
- arize/ml/stream_validation.py +64 -27
- arize/ml/surrogate_explainer/mimic.py +18 -10
- arize/ml/types.py +27 -67
- arize/pre_releases.py +10 -6
- arize/projects/client.py +9 -4
- arize/py.typed +0 -0
- arize/regions.py +11 -11
- arize/spans/client.py +125 -31
- arize/spans/columns.py +32 -36
- arize/spans/conversion.py +12 -11
- arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
- arize/spans/validation/annotations/value_validation.py +11 -14
- arize/spans/validation/common/argument_validation.py +3 -3
- arize/spans/validation/common/dataframe_form_validation.py +7 -7
- arize/spans/validation/common/value_validation.py +11 -14
- arize/spans/validation/evals/dataframe_form_validation.py +4 -4
- arize/spans/validation/evals/evals_validation.py +6 -6
- arize/spans/validation/evals/value_validation.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +1 -1
- arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
- arize/spans/validation/metadata/value_validation.py +23 -1
- arize/spans/validation/spans/dataframe_form_validation.py +2 -2
- arize/spans/validation/spans/spans_validation.py +6 -6
- arize/utils/arrow.py +38 -2
- arize/utils/cache.py +2 -2
- arize/utils/dataframe.py +4 -4
- arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
- arize/utils/openinference_conversion.py +10 -10
- arize/utils/proto.py +0 -1
- arize/utils/types.py +6 -6
- arize/version.py +1 -1
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0
arize/client.py
CHANGED
|
@@ -5,7 +5,7 @@ from __future__ import annotations
|
|
|
5
5
|
import logging
|
|
6
6
|
import shutil
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import TYPE_CHECKING, ClassVar
|
|
8
|
+
from typing import TYPE_CHECKING, ClassVar, cast
|
|
9
9
|
|
|
10
10
|
from arize._lazy import LazySubclientsMixin
|
|
11
11
|
from arize.config import SDKConfiguration
|
|
@@ -20,42 +20,22 @@ if TYPE_CHECKING:
|
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
-
# TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
|
|
24
|
-
|
|
25
|
-
# TODO(Kiko): Clean commented lines over the SDK
|
|
26
|
-
# TODO(Kiko): Implement https://github.com/Arize-ai/arize/pull/59917
|
|
27
|
-
|
|
28
|
-
# TODO(Kiko): Go private connect. Need a `base_domain`, such that we get:
|
|
29
|
-
# - api.<base_domain>
|
|
30
|
-
# - app.<base_domain>
|
|
31
|
-
# - flight.<base_domain>
|
|
32
|
-
# - otlp.<base_domain>
|
|
33
|
-
|
|
34
|
-
# TODO(Kiko): Enforce type checking, remove all type ignores
|
|
35
|
-
|
|
36
|
-
# TODO(Kiko): Go over docstrings
|
|
37
|
-
# TODO(Kiko): Missing parameter descriptions in some docstrings
|
|
38
|
-
# TODO(Kiko): Missing return descriptions in some docstrings
|
|
39
|
-
|
|
40
23
|
# TODO(Kiko): Go over headers on each logging call
|
|
24
|
+
# TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
|
|
41
25
|
|
|
42
26
|
# TODO(Kiko): Need to implement 'Update existing examples in a dataset'
|
|
43
27
|
|
|
44
|
-
# TODO(Kiko): why logs don't show on scripts, only on jupyter notebooks
|
|
45
|
-
# TODO(Kiko): test caching in colab environment
|
|
46
28
|
# TODO(Kiko): Protobuf versioning is too old
|
|
47
29
|
# TODO(Kiko): Go through main APIs and add CtxAdapter where missing
|
|
48
30
|
# TODO(Kiko): Search and handle other TODOs
|
|
49
|
-
# TODO(Kiko): Go over **every file** and do not import anything at runtime, use `if TYPE_CHECKING`
|
|
50
|
-
# with `from __future__ import annotations` (must include for Python < 3.11)
|
|
51
31
|
|
|
52
32
|
|
|
53
33
|
class ArizeClient(LazySubclientsMixin):
|
|
54
34
|
"""Root client for the Arize SDK.
|
|
55
35
|
|
|
56
36
|
The ArizeClient provides access to all Arize platform services including datasets,
|
|
57
|
-
experiments, ML models, projects, and spans. It uses SDKConfiguration
|
|
58
|
-
manage configuration settings.
|
|
37
|
+
experiments, ML models, projects, and spans. It uses :class:`arize.config.SDKConfiguration`
|
|
38
|
+
internally to manage configuration settings.
|
|
59
39
|
|
|
60
40
|
All parameters are optional (except api_key which must be provided via argument
|
|
61
41
|
or environment variable). For each parameter, values are resolved in this order:
|
|
@@ -139,6 +119,7 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
139
119
|
enable_caching: bool | None = None,
|
|
140
120
|
single_host: str | None = None,
|
|
141
121
|
single_port: int | None = None,
|
|
122
|
+
base_domain: str | None = None,
|
|
142
123
|
) -> None:
|
|
143
124
|
"""Initialize the Arize client with configuration parameters.
|
|
144
125
|
|
|
@@ -147,44 +128,77 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
147
128
|
|
|
148
129
|
Args:
|
|
149
130
|
api_key: Arize API key for authentication. Required - must be provided here
|
|
150
|
-
or via ARIZE_API_KEY environment variable.
|
|
131
|
+
or via ARIZE_API_KEY environment variable.
|
|
132
|
+
Raises MissingAPIKeyError if not set.
|
|
151
133
|
region: Arize region (e.g., Region.US_CENTRAL, Region.EU_WEST). When specified,
|
|
152
|
-
overrides individual host/port settings.
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
134
|
+
overrides individual host/port settings.
|
|
135
|
+
ENV: ARIZE_REGION.
|
|
136
|
+
Default: Region.UNSET.
|
|
137
|
+
api_host: Custom API endpoint host.
|
|
138
|
+
ENV: ARIZE_API_HOST.
|
|
139
|
+
Default: "api.arize.com".
|
|
140
|
+
api_scheme: API endpoint scheme (http/https).
|
|
141
|
+
ENV: ARIZE_API_SCHEME.
|
|
142
|
+
Default: "https".
|
|
143
|
+
otlp_host: OTLP endpoint host.
|
|
144
|
+
ENV: ARIZE_OTLP_HOST.
|
|
145
|
+
Default: "otlp.arize.com".
|
|
146
|
+
otlp_scheme: OTLP endpoint scheme (http/https).
|
|
147
|
+
ENV: ARIZE_OTLP_SCHEME.
|
|
148
|
+
Default: "https".
|
|
149
|
+
flight_host: Apache Arrow Flight endpoint host.
|
|
150
|
+
ENV: ARIZE_FLIGHT_HOST.
|
|
158
151
|
Default: "flight.arize.com".
|
|
159
|
-
flight_port: Apache Arrow Flight endpoint port (1-65535).
|
|
152
|
+
flight_port: Apache Arrow Flight endpoint port (1-65535).
|
|
153
|
+
ENV: ARIZE_FLIGHT_PORT.
|
|
160
154
|
Default: 443.
|
|
161
|
-
flight_scheme: Apache Arrow Flight endpoint scheme.
|
|
155
|
+
flight_scheme: Apache Arrow Flight endpoint scheme.
|
|
156
|
+
ENV: ARIZE_FLIGHT_SCHEME.
|
|
162
157
|
Default: "grpc+tls".
|
|
163
158
|
pyarrow_max_chunksize: Maximum PyArrow chunk size (1 to MAX_CHUNKSIZE).
|
|
164
|
-
ENV: ARIZE_MAX_CHUNKSIZE.
|
|
165
|
-
|
|
159
|
+
ENV: ARIZE_MAX_CHUNKSIZE.
|
|
160
|
+
Default: 10_000.
|
|
161
|
+
request_verify: Whether to verify SSL certificates.
|
|
162
|
+
ENV: ARIZE_REQUEST_VERIFY.
|
|
166
163
|
Default: True.
|
|
167
164
|
stream_max_workers: Maximum worker threads for streaming (minimum: 1).
|
|
168
|
-
ENV: ARIZE_STREAM_MAX_WORKERS.
|
|
165
|
+
ENV: ARIZE_STREAM_MAX_WORKERS.
|
|
166
|
+
Default: 8.
|
|
169
167
|
stream_max_queue_bound: Maximum queue size for streaming (minimum: 1).
|
|
170
|
-
ENV: ARIZE_STREAM_MAX_QUEUE_BOUND.
|
|
168
|
+
ENV: ARIZE_STREAM_MAX_QUEUE_BOUND.
|
|
169
|
+
Default: 5000.
|
|
171
170
|
max_http_payload_size_mb: Maximum HTTP payload size in MB (minimum: 1).
|
|
172
|
-
ENV: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
|
|
171
|
+
ENV: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
|
|
172
|
+
Default: 100.
|
|
173
173
|
arize_directory: Directory for SDK files (cache, logs, etc.).
|
|
174
|
-
ENV: ARIZE_DIRECTORY.
|
|
175
|
-
|
|
174
|
+
ENV: ARIZE_DIRECTORY.
|
|
175
|
+
Default: "~/.arize".
|
|
176
|
+
enable_caching: Whether to enable local caching.
|
|
177
|
+
ENV: ARIZE_ENABLE_CACHING.
|
|
176
178
|
Default: True.
|
|
177
|
-
single_host: Single host for all endpoints
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
179
|
+
single_host: Single host for all endpoints. When specified, overrides
|
|
180
|
+
individual hosts.
|
|
181
|
+
ENV: ARIZE_SINGLE_HOST.
|
|
182
|
+
Default: None.
|
|
183
|
+
single_port: Single port for all endpoints. When specified, overrides
|
|
184
|
+
individual ports.
|
|
185
|
+
ENV: ARIZE_SINGLE_PORT.
|
|
186
|
+
Default: 0 (not set).
|
|
187
|
+
base_domain: Base domain for generating endpoint hosts as api.<base_domain>,
|
|
188
|
+
otlp.<base_domain>, flight.<base_domain>. Intended for Private Connect setups.
|
|
189
|
+
When specified, overrides individual hosts.
|
|
190
|
+
ENV: ARIZE_BASE_DOMAIN.
|
|
191
|
+
Default: None.
|
|
181
192
|
|
|
182
193
|
Raises:
|
|
183
194
|
MissingAPIKeyError: If api_key is not provided via argument or environment variable.
|
|
195
|
+
MultipleEndpointOverridesError: If multiple endpoint override options (region,
|
|
196
|
+
single_host/single_port, base_domain) are provided.
|
|
184
197
|
|
|
185
198
|
Notes:
|
|
186
199
|
Values provided to this class override environment variables, which in turn
|
|
187
|
-
override default values. See SDKConfiguration
|
|
200
|
+
override default values. See :class:`arize.config.SDKConfiguration`
|
|
201
|
+
for detailed parameter documentation.
|
|
188
202
|
"""
|
|
189
203
|
cfg_kwargs: dict = {}
|
|
190
204
|
if api_key is not None:
|
|
@@ -223,6 +237,8 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
223
237
|
cfg_kwargs["single_host"] = single_host
|
|
224
238
|
if single_port is not None:
|
|
225
239
|
cfg_kwargs["single_port"] = single_port
|
|
240
|
+
if base_domain is not None:
|
|
241
|
+
cfg_kwargs["base_domain"] = base_domain
|
|
226
242
|
|
|
227
243
|
# Only the explicitly provided fields are passed; the rest use
|
|
228
244
|
# SDKConfiguration's default factories / defaults.
|
|
@@ -232,27 +248,27 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
232
248
|
@property
|
|
233
249
|
def datasets(self) -> DatasetsClient:
|
|
234
250
|
"""Access the datasets client for dataset operations (lazy-loaded)."""
|
|
235
|
-
return self.__getattr__("datasets")
|
|
251
|
+
return cast("DatasetsClient", self.__getattr__("datasets"))
|
|
236
252
|
|
|
237
253
|
@property
|
|
238
254
|
def experiments(self) -> ExperimentsClient:
|
|
239
255
|
"""Access the experiments client for experiment operations (lazy-loaded)."""
|
|
240
|
-
return self.__getattr__("experiments")
|
|
256
|
+
return cast("ExperimentsClient", self.__getattr__("experiments"))
|
|
241
257
|
|
|
242
258
|
@property
|
|
243
259
|
def ml(self) -> MLModelsClient:
|
|
244
260
|
"""Access the ML models client for ML model operations (lazy-loaded)."""
|
|
245
|
-
return self.__getattr__("ml")
|
|
261
|
+
return cast("MLModelsClient", self.__getattr__("ml"))
|
|
246
262
|
|
|
247
263
|
@property
|
|
248
264
|
def projects(self) -> ProjectsClient:
|
|
249
265
|
"""Access the projects client for project operations (lazy-loaded)."""
|
|
250
|
-
return self.__getattr__("projects")
|
|
266
|
+
return cast("ProjectsClient", self.__getattr__("projects"))
|
|
251
267
|
|
|
252
268
|
@property
|
|
253
269
|
def spans(self) -> SpansClient:
|
|
254
270
|
"""Access the spans client for tracing and span operations (lazy-loaded)."""
|
|
255
|
-
return self.__getattr__("spans")
|
|
271
|
+
return cast("SpansClient", self.__getattr__("spans"))
|
|
256
272
|
|
|
257
273
|
def __repr__(self) -> str:
|
|
258
274
|
"""Return a string representation of the Arize client configuration."""
|
arize/config.py
CHANGED
|
@@ -3,10 +3,8 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
|
-
import threading
|
|
7
6
|
from dataclasses import dataclass, field, fields
|
|
8
7
|
from pathlib import Path
|
|
9
|
-
from typing import Any
|
|
10
8
|
|
|
11
9
|
from arize.constants.config import (
|
|
12
10
|
DEFAULT_API_HOST,
|
|
@@ -27,6 +25,7 @@ from arize.constants.config import (
|
|
|
27
25
|
ENV_API_KEY,
|
|
28
26
|
ENV_API_SCHEME,
|
|
29
27
|
ENV_ARIZE_DIRECTORY,
|
|
28
|
+
ENV_BASE_DOMAIN,
|
|
30
29
|
ENV_ENABLE_CACHING,
|
|
31
30
|
ENV_FLIGHT_HOST,
|
|
32
31
|
ENV_FLIGHT_PORT,
|
|
@@ -44,6 +43,7 @@ from arize.constants.config import (
|
|
|
44
43
|
)
|
|
45
44
|
from arize.constants.pyarrow import MAX_CHUNKSIZE
|
|
46
45
|
from arize.exceptions.auth import MissingAPIKeyError
|
|
46
|
+
from arize.exceptions.config import MultipleEndpointOverridesError
|
|
47
47
|
from arize.regions import REGION_ENDPOINTS, Region
|
|
48
48
|
from arize.version import __version__
|
|
49
49
|
|
|
@@ -55,18 +55,44 @@ ALLOWED_HTTP_SCHEMES = {"http", "https"}
|
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
def _is_sensitive_field(name: str) -> bool:
|
|
58
|
+
"""Check if a field name contains sensitive information markers.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
name: The field name to check.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
bool: True if the field name contains 'key', 'token', or 'secret' (case-insensitive).
|
|
65
|
+
"""
|
|
58
66
|
n = name.lower()
|
|
59
67
|
return bool(any(k in n for k in SENSITIVE_FIELD_MARKERS))
|
|
60
68
|
|
|
61
69
|
|
|
62
70
|
def _mask_secret(secret: str, N: int = 4) -> str:
|
|
63
|
-
"""
|
|
71
|
+
"""Mask a secret string by showing only the first N characters.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
secret: The secret string to mask.
|
|
75
|
+
N: Number of characters to show before masking. Defaults to 4.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
str: The masked string (first N chars + '***'), or empty string if input is empty.
|
|
79
|
+
"""
|
|
64
80
|
if len(secret) == 0:
|
|
65
81
|
return ""
|
|
66
82
|
return f"{secret[:N]}***"
|
|
67
83
|
|
|
68
84
|
|
|
69
85
|
def _endpoint(scheme: str, base: str, path: str = "") -> str:
|
|
86
|
+
"""Construct a full endpoint URL from scheme, base, and optional path.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
scheme: The URL scheme (e.g., "http", "https").
|
|
90
|
+
base: The base URL or hostname.
|
|
91
|
+
path: Optional path to append to the base URL. Defaults to empty string.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
str: The fully constructed endpoint URL.
|
|
95
|
+
"""
|
|
70
96
|
endpoint = scheme + "://" + base.rstrip("/")
|
|
71
97
|
if path:
|
|
72
98
|
endpoint += "/" + path.lstrip("/")
|
|
@@ -74,6 +100,18 @@ def _endpoint(scheme: str, base: str, path: str = "") -> str:
|
|
|
74
100
|
|
|
75
101
|
|
|
76
102
|
def _env_http_scheme(name: str, default: str) -> str:
|
|
103
|
+
"""Get an HTTP scheme from environment variable with validation.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
name: The environment variable name.
|
|
107
|
+
default: The default value if the environment variable is not set.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
str: The validated HTTP scheme ('http' or 'https').
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
ValueError: If the scheme is not 'http' or 'https'.
|
|
114
|
+
"""
|
|
77
115
|
v = _env_str(name, default).lower()
|
|
78
116
|
if v not in ALLOWED_HTTP_SCHEMES:
|
|
79
117
|
raise ValueError(
|
|
@@ -88,6 +126,20 @@ def _env_str(
|
|
|
88
126
|
min_len: int | None = None,
|
|
89
127
|
max_len: int | None = None,
|
|
90
128
|
) -> str:
|
|
129
|
+
"""Get a string value from environment variable with length validation.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
name: The environment variable name.
|
|
133
|
+
default: The default value if the environment variable is not set.
|
|
134
|
+
min_len: Optional minimum length constraint for the string.
|
|
135
|
+
max_len: Optional maximum length constraint for the string.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
str: The validated string value (stripped of whitespace).
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
ValueError: If the string length violates min_len or max_len constraints.
|
|
142
|
+
"""
|
|
91
143
|
val = os.getenv(name, default).strip()
|
|
92
144
|
|
|
93
145
|
if min_len is not None and len(val) < min_len:
|
|
@@ -109,6 +161,20 @@ def _env_int(
|
|
|
109
161
|
min_val: int | None = None,
|
|
110
162
|
max_val: int | None = None,
|
|
111
163
|
) -> int:
|
|
164
|
+
"""Get an integer value from environment variable with range validation.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
name: The environment variable name.
|
|
168
|
+
default: The default value if the environment variable is not set.
|
|
169
|
+
min_val: Optional minimum value constraint for the integer.
|
|
170
|
+
max_val: Optional maximum value constraint for the integer.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
int: The validated integer value.
|
|
174
|
+
|
|
175
|
+
Raises:
|
|
176
|
+
ValueError: If the value cannot be parsed as an integer or violates min_val/max_val constraints.
|
|
177
|
+
"""
|
|
112
178
|
raw = os.getenv(name, default)
|
|
113
179
|
try:
|
|
114
180
|
val = int(raw)
|
|
@@ -134,6 +200,20 @@ def _env_float(
|
|
|
134
200
|
min_val: float | None = None,
|
|
135
201
|
max_val: float | None = None,
|
|
136
202
|
) -> float:
|
|
203
|
+
"""Get a float value from environment variable with range validation.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
name: The environment variable name.
|
|
207
|
+
default: The default value if the environment variable is not set.
|
|
208
|
+
min_val: Optional minimum value constraint for the float.
|
|
209
|
+
max_val: Optional maximum value constraint for the float.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
float: The validated float value.
|
|
213
|
+
|
|
214
|
+
Raises:
|
|
215
|
+
ValueError: If the value cannot be parsed as a float or violates min_val/max_val constraints.
|
|
216
|
+
"""
|
|
137
217
|
raw = os.getenv(name, default)
|
|
138
218
|
try:
|
|
139
219
|
val = float(raw)
|
|
@@ -154,10 +234,28 @@ def _env_float(
|
|
|
154
234
|
|
|
155
235
|
|
|
156
236
|
def _env_bool(name: str, default: bool) -> bool:
|
|
237
|
+
"""Get a boolean value from environment variable.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
name: The environment variable name.
|
|
241
|
+
default: The default boolean value if the environment variable is not set.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
bool: The parsed boolean value.
|
|
245
|
+
"""
|
|
157
246
|
return _parse_bool(os.getenv(name, str(default)))
|
|
158
247
|
|
|
159
248
|
|
|
160
249
|
def _parse_bool(val: bool | str | None) -> bool:
|
|
250
|
+
"""Parse a boolean value from various input types.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
val: The value to parse. Can be a bool, string, or None.
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
bool: True if the value is already True or matches one of the truthy strings
|
|
257
|
+
('1', 'true', 'yes', 'on', case-insensitive). False otherwise.
|
|
258
|
+
"""
|
|
161
259
|
if isinstance(val, bool):
|
|
162
260
|
return val
|
|
163
261
|
return (val or "").strip().lower() in {"1", "true", "yes", "on"}
|
|
@@ -167,6 +265,9 @@ def _parse_bool(val: bool | str | None) -> bool:
|
|
|
167
265
|
class SDKConfiguration:
|
|
168
266
|
"""Configuration for the Arize SDK with endpoint and authentication settings.
|
|
169
267
|
|
|
268
|
+
This class holds pure configuration data and does not manage client lifecycle.
|
|
269
|
+
Client creation and caching is handled by :class:`arize.ArizeClient`.
|
|
270
|
+
|
|
170
271
|
This class is used internally by ArizeClient to manage SDK configuration. It is not
|
|
171
272
|
recommended to use this class directly; users should interact with ArizeClient
|
|
172
273
|
instead.
|
|
@@ -225,13 +326,28 @@ class SDKConfiguration:
|
|
|
225
326
|
region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
|
|
226
327
|
individual host/port settings.
|
|
227
328
|
Environment variable: ARIZE_REGION.
|
|
228
|
-
Default: Region.
|
|
229
|
-
single_host: Single host to use for all endpoints.
|
|
329
|
+
Default: :class:`Region.UNSET`.
|
|
330
|
+
single_host: Single host to use for all endpoints. When specified, overrides
|
|
331
|
+
individual host settings.
|
|
230
332
|
Environment variable: ARIZE_SINGLE_HOST.
|
|
231
333
|
Default: "" (not set).
|
|
232
|
-
single_port: Single port to use for all endpoints.
|
|
334
|
+
single_port: Single port to use for all endpoints. When specified, overrides
|
|
335
|
+
individual port settings (0-65535).
|
|
233
336
|
Environment variable: ARIZE_SINGLE_PORT.
|
|
234
337
|
Default: 0 (not set).
|
|
338
|
+
base_domain: Base domain for generating all endpoint hosts. Intended for Private Connect
|
|
339
|
+
setups. When specified, generates hosts as api.<base_domain>, otlp.<base_domain>,
|
|
340
|
+
flight.<base_domain>. When specified, overrides individual host settings.
|
|
341
|
+
Environment variable: ARIZE_BASE_DOMAIN.
|
|
342
|
+
Default: "" (not set).
|
|
343
|
+
|
|
344
|
+
Note:
|
|
345
|
+
The endpoint override options (region, single_host/single_port, base_domain) are
|
|
346
|
+
mutually exclusive. Specifying more than one will raise MultipleEndpointOverridesError.
|
|
347
|
+
|
|
348
|
+
Raises:
|
|
349
|
+
MissingAPIKeyError: If api_key is not provided via argument or environment variable.
|
|
350
|
+
MultipleEndpointOverridesError: If multiple endpoint override options are provided.
|
|
235
351
|
"""
|
|
236
352
|
|
|
237
353
|
api_key: str = field(
|
|
@@ -322,33 +438,73 @@ class SDKConfiguration:
|
|
|
322
438
|
ENV_SINGLE_PORT, 0, min_val=0, max_val=65535
|
|
323
439
|
)
|
|
324
440
|
)
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
_gen_client: Any = field(default=None, repr=False, compare=False)
|
|
328
|
-
_gen_lock: threading.Lock = field(
|
|
329
|
-
default_factory=threading.Lock, repr=False, compare=False
|
|
441
|
+
base_domain: str = field(
|
|
442
|
+
default_factory=lambda: _env_str(ENV_BASE_DOMAIN, "")
|
|
330
443
|
)
|
|
331
444
|
|
|
332
445
|
def __post_init__(self) -> None:
|
|
333
446
|
"""Validate and configure SDK endpoints after initialization.
|
|
334
447
|
|
|
448
|
+
Endpoint override options are mutually exclusive. Only one of the following
|
|
449
|
+
can be specified:
|
|
450
|
+
1. region - Overrides all via REGION_ENDPOINTS mapping
|
|
451
|
+
2. single_host/single_port - Overrides individual hosts/ports
|
|
452
|
+
3. base_domain - Generates hosts from base domain
|
|
453
|
+
|
|
454
|
+
If none are specified, per-endpoint host/port settings are used.
|
|
455
|
+
|
|
335
456
|
Raises:
|
|
336
|
-
MissingAPIKeyError: If
|
|
457
|
+
MissingAPIKeyError: If api_key is not provided.
|
|
458
|
+
MultipleEndpointOverridesError: If multiple endpoint override options are provided.
|
|
337
459
|
"""
|
|
338
|
-
# Validate
|
|
460
|
+
# Validate configuration
|
|
339
461
|
if not self.api_key:
|
|
340
462
|
raise MissingAPIKeyError()
|
|
341
463
|
|
|
464
|
+
# Check which override options are set
|
|
465
|
+
has_base_domain = bool(self.base_domain)
|
|
342
466
|
has_single_host = bool(self.single_host)
|
|
343
467
|
has_single_port = self.single_port != 0
|
|
344
|
-
has_region = self.region is not Region.
|
|
345
|
-
|
|
468
|
+
has_region = self.region is not Region.UNSET
|
|
469
|
+
|
|
470
|
+
# Ensure only one override method is used (mutually exclusive)
|
|
471
|
+
override_count = sum(
|
|
472
|
+
[has_base_domain, has_single_host or has_single_port, has_region]
|
|
473
|
+
)
|
|
474
|
+
if override_count > 1:
|
|
475
|
+
# Determine which overrides were provided
|
|
476
|
+
provided_overrides = []
|
|
477
|
+
if has_region:
|
|
478
|
+
provided_overrides.append(f"region={self.region.value}")
|
|
479
|
+
if has_single_host or has_single_port:
|
|
480
|
+
if has_single_host:
|
|
481
|
+
provided_overrides.append(
|
|
482
|
+
f"single_host={self.single_host!r}"
|
|
483
|
+
)
|
|
484
|
+
if has_single_port:
|
|
485
|
+
provided_overrides.append(f"single_port={self.single_port}")
|
|
486
|
+
if has_base_domain:
|
|
487
|
+
provided_overrides.append(f"base_domain={self.base_domain!r}")
|
|
488
|
+
|
|
489
|
+
error_message = (
|
|
490
|
+
f"Multiple endpoint override options provided: {', '.join(provided_overrides)}. "
|
|
491
|
+
"Only one of the following can be specified: 'region', "
|
|
492
|
+
"'single_host'/'single_port', or 'base_domain'."
|
|
493
|
+
)
|
|
494
|
+
logger.error(error_message)
|
|
495
|
+
raise MultipleEndpointOverridesError(error_message)
|
|
496
|
+
|
|
497
|
+
if has_base_domain:
|
|
346
498
|
logger.info(
|
|
347
|
-
"
|
|
348
|
-
|
|
499
|
+
"Base domain %r provided; generating hosts from base domain.",
|
|
500
|
+
self.base_domain,
|
|
501
|
+
)
|
|
502
|
+
object.__setattr__(self, "api_host", f"api.{self.base_domain}")
|
|
503
|
+
object.__setattr__(self, "otlp_host", f"otlp.{self.base_domain}")
|
|
504
|
+
object.__setattr__(
|
|
505
|
+
self, "flight_host", f"flight.{self.base_domain}"
|
|
349
506
|
)
|
|
350
507
|
|
|
351
|
-
# Single host override: if single_host is set, it overrides hosts
|
|
352
508
|
if has_single_host:
|
|
353
509
|
logger.info(
|
|
354
510
|
"Single host %r provided; overriding hosts configuration with single host.",
|
|
@@ -358,7 +514,6 @@ class SDKConfiguration:
|
|
|
358
514
|
object.__setattr__(self, "otlp_host", self.single_host)
|
|
359
515
|
object.__setattr__(self, "flight_host", self.single_host)
|
|
360
516
|
|
|
361
|
-
# Single port override: if single_port is set, it overrides ports
|
|
362
517
|
if has_single_port:
|
|
363
518
|
logger.info(
|
|
364
519
|
"Single port %s provided; overriding ports configuration with single port.",
|
|
@@ -366,15 +521,12 @@ class SDKConfiguration:
|
|
|
366
521
|
)
|
|
367
522
|
object.__setattr__(self, "flight_port", self.single_port)
|
|
368
523
|
|
|
369
|
-
# Region override: if region is set, it *always* wins over host/port fields
|
|
370
524
|
if has_region:
|
|
371
|
-
endpoints = REGION_ENDPOINTS[self.region]
|
|
372
|
-
|
|
373
|
-
# Override config (region trumps everything)
|
|
374
525
|
logger.info(
|
|
375
526
|
"Region %s provided; overriding hosts & ports configuration with region defaults.",
|
|
376
527
|
self.region.value,
|
|
377
528
|
)
|
|
529
|
+
endpoints = REGION_ENDPOINTS[self.region]
|
|
378
530
|
object.__setattr__(self, "api_host", endpoints.api_host)
|
|
379
531
|
object.__setattr__(self, "otlp_host", endpoints.otlp_host)
|
|
380
532
|
object.__setattr__(self, "flight_host", endpoints.flight_host)
|
|
@@ -444,28 +596,3 @@ class SDKConfiguration:
|
|
|
444
596
|
lines.append(f" {f.name}={val!r},")
|
|
445
597
|
lines.append(")")
|
|
446
598
|
return "\n".join(lines)
|
|
447
|
-
|
|
448
|
-
# TODO(Kiko): This may not be well placed in this class
|
|
449
|
-
def get_generated_client(self) -> object:
|
|
450
|
-
"""Get or create the generated OpenAPI client instance."""
|
|
451
|
-
# If already cached, return immediately
|
|
452
|
-
if self._gen_client is not None:
|
|
453
|
-
return self._gen_client
|
|
454
|
-
|
|
455
|
-
# Thread-safe initialization
|
|
456
|
-
with self._gen_lock:
|
|
457
|
-
if self._gen_client is not None:
|
|
458
|
-
return self._gen_client
|
|
459
|
-
|
|
460
|
-
# Import lazily so extra dependencies can be
|
|
461
|
-
# enforced outside the configuration class
|
|
462
|
-
from arize._generated import api_client as gen
|
|
463
|
-
|
|
464
|
-
cfg = gen.Configuration(host=self.api_url)
|
|
465
|
-
if self.api_key:
|
|
466
|
-
cfg.access_token = self.api_key
|
|
467
|
-
client = gen.ApiClient(cfg)
|
|
468
|
-
|
|
469
|
-
# Bypass frozen to set the cache once
|
|
470
|
-
object.__setattr__(self, "_gen_client", client)
|
|
471
|
-
return client
|
arize/constants/config.py
CHANGED
|
@@ -14,6 +14,7 @@ ENV_FLIGHT_PORT = "ARIZE_FLIGHT_PORT"
|
|
|
14
14
|
ENV_FLIGHT_SCHEME = "ARIZE_FLIGHT_SCHEME"
|
|
15
15
|
ENV_SINGLE_HOST = "ARIZE_SINGLE_HOST"
|
|
16
16
|
ENV_SINGLE_PORT = "ARIZE_SINGLE_PORT"
|
|
17
|
+
ENV_BASE_DOMAIN = "ARIZE_BASE_DOMAIN"
|
|
17
18
|
ENV_PYARROW_MAX_CHUNKSIZE = "ARIZE_MAX_CHUNKSIZE"
|
|
18
19
|
ENV_REQUEST_VERIFY = "ARIZE_REQUEST_VERIFY"
|
|
19
20
|
ENV_MAX_HTTP_PAYLOAD_SIZE_MB = "ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB"
|
arize/constants/ml.py
CHANGED
|
@@ -3,50 +3,43 @@
|
|
|
3
3
|
import json
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
|
-
# MAX_BYTES_PER_BULK_RECORD = 100000
|
|
7
|
-
# MAX_DAYS_WITHIN_RANGE = 365
|
|
8
6
|
MIN_PREDICTION_ID_LEN = 1
|
|
9
7
|
MAX_PREDICTION_ID_LEN = 512
|
|
10
8
|
MIN_DOCUMENT_ID_LEN = 1
|
|
11
9
|
MAX_DOCUMENT_ID_LEN = 128
|
|
12
|
-
#
|
|
10
|
+
# The maximum number of character for tag values
|
|
13
11
|
MAX_TAG_LENGTH = 20_000
|
|
14
12
|
MAX_TAG_LENGTH_TRUNCATION = 1_000
|
|
15
|
-
#
|
|
13
|
+
# The maximum number of character for embedding raw data
|
|
16
14
|
MAX_RAW_DATA_CHARACTERS = 2_000_000
|
|
17
15
|
MAX_RAW_DATA_CHARACTERS_TRUNCATION = 5_000
|
|
18
16
|
# The maximum number of acceptable years in the past from current time for prediction_timestamps
|
|
19
17
|
MAX_PAST_YEARS_FROM_CURRENT_TIME = 5
|
|
20
18
|
# The maximum number of acceptable years in the future from current time for prediction_timestamps
|
|
21
19
|
MAX_FUTURE_YEARS_FROM_CURRENT_TIME = 1
|
|
22
|
-
#
|
|
20
|
+
# The maximum number of character for llm model name
|
|
23
21
|
MAX_LLM_MODEL_NAME_LENGTH = 20_000
|
|
24
22
|
MAX_LLM_MODEL_NAME_LENGTH_TRUNCATION = 50
|
|
25
|
-
#
|
|
23
|
+
# The maximum number of character for prompt template
|
|
26
24
|
MAX_PROMPT_TEMPLATE_LENGTH = 50_000
|
|
27
25
|
MAX_PROMPT_TEMPLATE_LENGTH_TRUNCATION = 5_000
|
|
28
|
-
#
|
|
26
|
+
# The maximum number of character for prompt template version
|
|
29
27
|
MAX_PROMPT_TEMPLATE_VERSION_LENGTH = 20_000
|
|
30
28
|
MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
|
|
31
|
-
#
|
|
29
|
+
# The maximum number of embeddings
|
|
32
30
|
MAX_NUMBER_OF_EMBEDDINGS = 30
|
|
33
31
|
MAX_EMBEDDING_DIMENSIONALITY = 20_000
|
|
34
|
-
#
|
|
32
|
+
# The maximum number of classes for multi class
|
|
35
33
|
MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 500
|
|
36
34
|
MAX_MULTI_CLASS_NAME_LENGTH = 100
|
|
37
35
|
# The maximum number of references in embedding similarity search params
|
|
38
36
|
MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
|
|
39
|
-
#
|
|
40
|
-
# # Arize generated columns
|
|
41
|
-
# GENERATED_PREDICTION_LABEL_COL = "arize_generated_prediction_label"
|
|
42
|
-
# GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
|
|
43
|
-
#
|
|
44
|
-
# # reserved columns for LLM run metadata
|
|
37
|
+
# reserved columns for LLM run metadata
|
|
45
38
|
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count" # noqa: S105
|
|
46
39
|
LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count" # noqa: S105
|
|
47
40
|
LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count" # noqa: S105
|
|
48
41
|
LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
|
|
49
|
-
|
|
42
|
+
|
|
50
43
|
# all reserved tags
|
|
51
44
|
RESERVED_TAG_COLS = [
|
|
52
45
|
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME,
|
arize/constants/spans.py
CHANGED
|
@@ -5,19 +5,15 @@ DEFAULT_DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%f+00:00"
|
|
|
5
5
|
# Minumum/Maximum number of characters for span/trace/parent ids in spans
|
|
6
6
|
SPAN_ID_MIN_STR_LENGTH = 12
|
|
7
7
|
SPAN_ID_MAX_STR_LENGTH = 128
|
|
8
|
-
#
|
|
8
|
+
# Minumum/Maximum number of characters for span name
|
|
9
9
|
SPAN_NAME_MIN_STR_LENGTH = 0
|
|
10
10
|
SPAN_NAME_MAX_STR_LENGTH = 50
|
|
11
|
-
#
|
|
11
|
+
# Minumum/Maximum number of characters for span status message
|
|
12
12
|
SPAN_STATUS_MSG_MIN_STR_LENGTH = 0
|
|
13
13
|
SPAN_STATUS_MSG_MAX_STR_LENGTH = 10_000
|
|
14
|
-
#
|
|
14
|
+
# Minumum/Maximum number of characters for span event name
|
|
15
15
|
SPAN_EVENT_NAME_MAX_STR_LENGTH = 100
|
|
16
|
-
#
|
|
17
|
-
# SPAN_EVENT_ATTRS_MAX_STR_LENGTH = 10_000
|
|
18
|
-
# # Maximum number of characters for span kind
|
|
19
|
-
# SPAN_KIND_MAX_STR_LENGTH = 100
|
|
20
|
-
# SPAN_EXCEPTION_TYPE_MAX_STR_LENGTH = 100
|
|
16
|
+
# Minumum/Maximum number of characters for span event attributes
|
|
21
17
|
SPAN_EXCEPTION_MESSAGE_MAX_STR_LENGTH = 100
|
|
22
18
|
SPAN_EXCEPTION_STACK_TRACE_MAX_STR_LENGTH = 10_000
|
|
23
19
|
SPAN_IO_VALUE_MAX_STR_LENGTH = 4_000_000
|
|
@@ -29,7 +25,6 @@ SPAN_LLM_MESSAGE_ROLE_MAX_STR_LENGTH = 100
|
|
|
29
25
|
SPAN_LLM_MESSAGE_CONTENT_MAX_STR_LENGTH = 4_000_000
|
|
30
26
|
SPAN_LLM_TOOL_CALL_FUNCTION_NAME_MAX_STR_LENGTH = 500
|
|
31
27
|
SPAN_LLM_PROMPT_TEMPLATE_MAX_STR_LENGTH = 4_000_000
|
|
32
|
-
# SPAN_LLM_PROMPT_TEMPLATE_VARIABLES_MAX_STR_LENGTH = 10_000
|
|
33
28
|
SPAN_LLM_PROMPT_TEMPLATE_VERSION_MAX_STR_LENGTH = 100
|
|
34
29
|
SPAN_TOOL_NAME_MAX_STR_LENGTH = 100
|
|
35
30
|
SPAN_TOOL_DESCRIPTION_MAX_STR_LENGTH = 1_000
|
|
@@ -43,7 +38,7 @@ JSON_STRING_MAX_STR_LENGTH = 4_000_000
|
|
|
43
38
|
EVAL_LABEL_MIN_STR_LENGTH = 1 # we do not accept empty strings
|
|
44
39
|
EVAL_LABEL_MAX_STR_LENGTH = 100
|
|
45
40
|
EVAL_EXPLANATION_MAX_STR_LENGTH = 10_000
|
|
46
|
-
|
|
41
|
+
|
|
47
42
|
# # Annotation related constants
|
|
48
43
|
ANNOTATION_LABEL_MIN_STR_LENGTH = 1
|
|
49
44
|
ANNOTATION_LABEL_MAX_STR_LENGTH = 100 # Max length for annotation label string
|