arize 8.0.0a23__py3-none-any.whl → 8.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +11 -10
- arize/_exporter/client.py +1 -1
- arize/client.py +36 -126
- arize/config.py +59 -100
- arize/datasets/client.py +11 -6
- arize/embeddings/nlp_generators.py +12 -6
- arize/embeddings/tabular_generators.py +14 -11
- arize/experiments/__init__.py +12 -0
- arize/experiments/client.py +11 -6
- arize/{models → ml}/batch_validation/errors.py +2 -2
- arize/{models → ml}/batch_validation/validator.py +5 -3
- arize/{models → ml}/casting.py +42 -78
- arize/{models → ml}/client.py +19 -17
- arize/{models → ml}/proto.py +2 -2
- arize/{models → ml}/stream_validation.py +1 -1
- arize/{models → ml}/surrogate_explainer/mimic.py +6 -2
- arize/{types.py → ml/types.py} +99 -234
- arize/pre_releases.py +2 -1
- arize/projects/client.py +11 -6
- arize/spans/client.py +89 -84
- arize/spans/conversion.py +11 -4
- arize/spans/validation/common/value_validation.py +1 -1
- arize/spans/validation/spans/dataframe_form_validation.py +1 -1
- arize/spans/validation/spans/value_validation.py +2 -1
- arize/utils/dataframe.py +1 -1
- arize/utils/online_tasks/dataframe_preprocessor.py +5 -6
- arize/utils/types.py +105 -0
- arize/version.py +1 -1
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/METADATA +10 -4
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/RECORD +37 -36
- /arize/{models → ml}/__init__.py +0 -0
- /arize/{models → ml}/batch_validation/__init__.py +0 -0
- /arize/{models → ml}/bounded_executor.py +0 -0
- /arize/{models → ml}/surrogate_explainer/__init__.py +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/WHEEL +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/NOTICE +0 -0
arize/__init__.py
CHANGED
|
@@ -44,18 +44,19 @@ def make_to_df(field_name: str) -> object:
|
|
|
44
44
|
- If an item is a mapping (dict-like), use it as-is.
|
|
45
45
|
- Otherwise, raise a ValueError (unsupported row type).
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
47
|
+
Args:
|
|
48
|
+
self (object): The object instance containing the field to convert.
|
|
49
|
+
by_alias (bool): Use field aliases when dumping Pydantic models.
|
|
50
|
+
exclude_none (str | bool): Control None/NaN column dropping.
|
|
51
|
+
- False: keep Nones as-is
|
|
52
|
+
- "all": drop columns where all values are None/NaN
|
|
53
|
+
- "any": drop columns where any value is None/NaN
|
|
54
|
+
- True: alias for "all"
|
|
55
|
+
json_normalize (bool): If True, flatten nested dicts via `pandas.json_normalize`.
|
|
56
|
+
convert_dtypes (bool): If True, call `DataFrame.convert_dtypes()` at the end.
|
|
56
57
|
|
|
57
58
|
Returns:
|
|
58
|
-
|
|
59
|
+
pandas.DataFrame: The converted DataFrame.
|
|
59
60
|
"""
|
|
60
61
|
import pandas as pd
|
|
61
62
|
|
arize/_exporter/client.py
CHANGED
|
@@ -17,7 +17,7 @@ from arize._exporter.validation import (
|
|
|
17
17
|
)
|
|
18
18
|
from arize._generated.protocol.flight import flight_pb2
|
|
19
19
|
from arize.logging import CtxAdapter
|
|
20
|
-
from arize.types import Environments, SimilaritySearchParams
|
|
20
|
+
from arize.ml.types import Environments, SimilaritySearchParams
|
|
21
21
|
from arize.utils.dataframe import reset_dataframe_index
|
|
22
22
|
|
|
23
23
|
logger = logging.getLogger(__name__)
|
arize/client.py
CHANGED
|
@@ -13,13 +13,21 @@ from arize.config import SDKConfiguration
|
|
|
13
13
|
if TYPE_CHECKING:
|
|
14
14
|
from arize.datasets.client import DatasetsClient
|
|
15
15
|
from arize.experiments.client import ExperimentsClient
|
|
16
|
-
from arize.
|
|
16
|
+
from arize.ml.client import MLModelsClient
|
|
17
17
|
from arize.projects.client import ProjectsClient
|
|
18
18
|
from arize.regions import Region
|
|
19
19
|
from arize.spans.client import SpansClient
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
23
|
+
# TODO(Kiko): Clean commented lines over the SDK
|
|
24
|
+
# TODO(Kiko): Implement https://github.com/Arize-ai/arize/pull/59917
|
|
25
|
+
|
|
26
|
+
# TODO(Kiko): Go private connect. Need a `base_domain`, such that we get:
|
|
27
|
+
# - api.<base_domain>
|
|
28
|
+
# - app.<base_domain>
|
|
29
|
+
# - flight.<base_domain>
|
|
30
|
+
# - otlp.<base_domain>
|
|
23
31
|
|
|
24
32
|
# TODO(Kiko): Enforce type checking, remove all type ignores
|
|
25
33
|
|
|
@@ -44,133 +52,35 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
44
52
|
"""Root client for the Arize SDK.
|
|
45
53
|
|
|
46
54
|
The ArizeClient provides access to all Arize platform services including datasets,
|
|
47
|
-
experiments, models, projects, and spans. It uses SDKConfiguration internally to
|
|
55
|
+
experiments, ML models, projects, and spans. It uses SDKConfiguration internally to
|
|
48
56
|
manage configuration settings.
|
|
49
57
|
|
|
50
|
-
Configuration Precedence
|
|
51
|
-
------------------------
|
|
52
58
|
All parameters are optional (except api_key which must be provided via argument
|
|
53
59
|
or environment variable). For each parameter, values are resolved in this order:
|
|
54
|
-
1. Explicit value passed to constructor (highest priority)
|
|
55
|
-
2. Environment variable value (see SDKConfiguration for variable names)
|
|
56
|
-
3. Built-in default value (lowest priority)
|
|
57
|
-
|
|
58
|
-
Parameters
|
|
59
|
-
----------
|
|
60
|
-
api_key : str
|
|
61
|
-
Arize API key for authentication. Required - must be provided via this parameter
|
|
62
|
-
or the ARIZE_API_KEY environment variable.
|
|
63
|
-
Environment variable: ARIZE_API_KEY
|
|
64
|
-
Default: None (raises MissingAPIKeyError if not provided)
|
|
65
|
-
|
|
66
|
-
region : Region, optional
|
|
67
|
-
Arize region (e.g., Region.US_CENTRAL, Region.EU_WEST). When specified,
|
|
68
|
-
overrides individual host/port settings with region-specific endpoints.
|
|
69
|
-
Environment variable: ARIZE_REGION
|
|
70
|
-
Default: Region.UNSPECIFIED
|
|
71
|
-
|
|
72
|
-
api_host : str, optional
|
|
73
|
-
Custom API endpoint host override.
|
|
74
|
-
Environment variable: ARIZE_API_HOST
|
|
75
|
-
Default: "api.arize.com"
|
|
76
|
-
|
|
77
|
-
api_scheme : str, optional
|
|
78
|
-
API endpoint scheme (http/https).
|
|
79
|
-
Environment variable: ARIZE_API_SCHEME
|
|
80
|
-
Default: "https"
|
|
81
|
-
|
|
82
|
-
otlp_host : str, optional
|
|
83
|
-
OTLP (OpenTelemetry Protocol) endpoint host override.
|
|
84
|
-
Environment variable: ARIZE_OTLP_HOST
|
|
85
|
-
Default: "otlp.arize.com"
|
|
86
|
-
|
|
87
|
-
otlp_scheme : str, optional
|
|
88
|
-
OTLP endpoint scheme (http/https).
|
|
89
|
-
Environment variable: ARIZE_OTLP_SCHEME
|
|
90
|
-
Default: "https"
|
|
91
|
-
|
|
92
|
-
flight_host : str, optional
|
|
93
|
-
Apache Arrow Flight endpoint host override.
|
|
94
|
-
Environment variable: ARIZE_FLIGHT_HOST
|
|
95
|
-
Default: "flight.arize.com"
|
|
96
|
-
|
|
97
|
-
flight_port : int, optional
|
|
98
|
-
Apache Arrow Flight endpoint port (1-65535).
|
|
99
|
-
Environment variable: ARIZE_FLIGHT_PORT
|
|
100
|
-
Default: 443
|
|
101
|
-
|
|
102
|
-
flight_scheme : str, optional
|
|
103
|
-
Apache Arrow Flight endpoint scheme.
|
|
104
|
-
Environment variable: ARIZE_FLIGHT_SCHEME
|
|
105
|
-
Default: "grpc+tls"
|
|
106
|
-
|
|
107
|
-
pyarrow_max_chunksize : int, optional
|
|
108
|
-
Maximum chunk size for PyArrow operations (1 to MAX_CHUNKSIZE).
|
|
109
|
-
Environment variable: ARIZE_MAX_CHUNKSIZE
|
|
110
|
-
Default: 10_000
|
|
111
|
-
|
|
112
|
-
request_verify : bool, optional
|
|
113
|
-
Whether to verify SSL certificates for HTTP requests.
|
|
114
|
-
Environment variable: ARIZE_REQUEST_VERIFY
|
|
115
|
-
Default: True
|
|
116
|
-
|
|
117
|
-
stream_max_workers : int, optional
|
|
118
|
-
Maximum number of worker threads for streaming operations (minimum: 1).
|
|
119
|
-
Environment variable: ARIZE_STREAM_MAX_WORKERS
|
|
120
|
-
Default: 8
|
|
121
|
-
|
|
122
|
-
stream_max_queue_bound : int, optional
|
|
123
|
-
Maximum queue size for streaming operations (minimum: 1).
|
|
124
|
-
Environment variable: ARIZE_STREAM_MAX_QUEUE_BOUND
|
|
125
|
-
Default: 5000
|
|
126
|
-
|
|
127
|
-
max_http_payload_size_mb : float, optional
|
|
128
|
-
Maximum HTTP payload size in megabytes (minimum: 1).
|
|
129
|
-
Environment variable: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB
|
|
130
|
-
Default: 100
|
|
131
|
-
|
|
132
|
-
arize_directory : str, optional
|
|
133
|
-
Directory for Arize SDK files (cache, logs, etc.).
|
|
134
|
-
Environment variable: ARIZE_DIRECTORY
|
|
135
|
-
Default: "~/.arize"
|
|
136
|
-
|
|
137
|
-
enable_caching : bool, optional
|
|
138
|
-
Whether to enable local caching.
|
|
139
|
-
Environment variable: ARIZE_ENABLE_CACHING
|
|
140
|
-
Default: True
|
|
141
|
-
|
|
142
|
-
single_host : str, optional
|
|
143
|
-
Single host to use for all endpoints. Overrides individual host settings.
|
|
144
|
-
Environment variable: ARIZE_SINGLE_HOST
|
|
145
|
-
Default: None (not set)
|
|
146
|
-
|
|
147
|
-
single_port : int, optional
|
|
148
|
-
Single port to use for all endpoints. Overrides individual port settings (0-65535).
|
|
149
|
-
Environment variable: ARIZE_SINGLE_PORT
|
|
150
|
-
Default: 0 (not set)
|
|
151
60
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
61
|
+
1. Explicit value passed to constructor (highest priority)
|
|
62
|
+
2. Environment variable value (see SDKConfiguration for variable names)
|
|
63
|
+
3. Built-in default value (lowest priority)
|
|
155
64
|
|
|
156
|
-
|
|
65
|
+
Examples:
|
|
66
|
+
Initialize with API key only (other settings use defaults):
|
|
157
67
|
|
|
158
|
-
|
|
68
|
+
>>> client = ArizeClient(api_key="your-api-key")
|
|
159
69
|
|
|
160
|
-
|
|
161
|
-
... api_key="your-api-key",
|
|
162
|
-
... api_host="custom.api.com",
|
|
163
|
-
... flight_port=8443,
|
|
164
|
-
... )
|
|
70
|
+
Initialize with custom endpoints:
|
|
165
71
|
|
|
166
|
-
|
|
72
|
+
>>> client = ArizeClient(
|
|
73
|
+
... api_key="your-api-key",
|
|
74
|
+
... api_host="custom.api.com",
|
|
75
|
+
... flight_port=8443,
|
|
76
|
+
... )
|
|
167
77
|
|
|
168
|
-
|
|
169
|
-
>>> client = ArizeClient(api_key="your-api-key", region=Region.EU_WEST)
|
|
78
|
+
Initialize with region (overrides host/port settings):
|
|
170
79
|
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
80
|
+
>>> from arize.regions import Region
|
|
81
|
+
>>> client = ArizeClient(
|
|
82
|
+
... api_key="your-api-key", region=Region.EU_WEST
|
|
83
|
+
... )
|
|
174
84
|
"""
|
|
175
85
|
|
|
176
86
|
_SUBCLIENTS: ClassVar[dict[str, tuple[str, str]]] = {
|
|
@@ -186,8 +96,8 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
186
96
|
"arize.projects.client",
|
|
187
97
|
"ProjectsClient",
|
|
188
98
|
),
|
|
189
|
-
"
|
|
190
|
-
"arize.
|
|
99
|
+
"ml": (
|
|
100
|
+
"arize.ml.client",
|
|
191
101
|
"MLModelsClient",
|
|
192
102
|
),
|
|
193
103
|
"spans": (
|
|
@@ -231,7 +141,7 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
231
141
|
# Imports are gated in each method of the models client
|
|
232
142
|
# This is to allow for very lean package install if people only
|
|
233
143
|
# want to stream ML records
|
|
234
|
-
"
|
|
144
|
+
"ml": (None, ()),
|
|
235
145
|
}
|
|
236
146
|
|
|
237
147
|
def __init__(
|
|
@@ -299,8 +209,8 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
299
209
|
MissingAPIKeyError: If api_key is not provided via argument or environment variable.
|
|
300
210
|
|
|
301
211
|
Notes:
|
|
302
|
-
Values provided
|
|
303
|
-
default values. See SDKConfiguration for detailed parameter documentation.
|
|
212
|
+
Values provided to this class override environment variables, which in turn
|
|
213
|
+
override default values. See SDKConfiguration for detailed parameter documentation.
|
|
304
214
|
"""
|
|
305
215
|
cfg_kwargs: dict = {}
|
|
306
216
|
if api_key is not None:
|
|
@@ -356,9 +266,9 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
356
266
|
return self.__getattr__("experiments")
|
|
357
267
|
|
|
358
268
|
@property
|
|
359
|
-
def
|
|
360
|
-
"""Access the models client for ML model operations (lazy-loaded)."""
|
|
361
|
-
return self.__getattr__("
|
|
269
|
+
def ml(self) -> MLModelsClient:
|
|
270
|
+
"""Access the ML models client for ML model operations (lazy-loaded)."""
|
|
271
|
+
return self.__getattr__("ml")
|
|
362
272
|
|
|
363
273
|
@property
|
|
364
274
|
def projects(self) -> ProjectsClient:
|
|
@@ -385,7 +295,7 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
385
295
|
# 'datasets': lazy,
|
|
386
296
|
# 'experiments': lazy,
|
|
387
297
|
# 'spans': lazy,
|
|
388
|
-
# '
|
|
298
|
+
# 'ml': lazy,
|
|
389
299
|
# }
|
|
390
300
|
# )
|
|
391
301
|
lines = [f"{self.__class__.__name__}("]
|
arize/config.py
CHANGED
|
@@ -167,112 +167,71 @@ def _parse_bool(val: bool | str | None) -> bool:
|
|
|
167
167
|
class SDKConfiguration:
|
|
168
168
|
"""Configuration for the Arize SDK with endpoint and authentication settings.
|
|
169
169
|
|
|
170
|
-
This class is used internally by ArizeClient to manage SDK configuration.
|
|
171
|
-
|
|
170
|
+
This class is used internally by ArizeClient to manage SDK configuration. It is not
|
|
171
|
+
recommended to use this class directly; users should interact with ArizeClient
|
|
172
|
+
instead.
|
|
172
173
|
|
|
173
|
-
Configuration Precedence
|
|
174
|
-
------------------------
|
|
175
174
|
Each configuration parameter follows this resolution order:
|
|
176
175
|
1. Explicit value passed to ArizeClient constructor (highest priority)
|
|
177
176
|
2. Environment variable value
|
|
178
177
|
3. Built-in default value (lowest priority)
|
|
179
178
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
stream_max_queue_bound : int
|
|
238
|
-
Maximum queue size for streaming operations (minimum: 1).
|
|
239
|
-
Environment variable: ARIZE_STREAM_MAX_QUEUE_BOUND
|
|
240
|
-
Default: 5000
|
|
241
|
-
|
|
242
|
-
max_http_payload_size_mb : float
|
|
243
|
-
Maximum HTTP payload size in megabytes (minimum: 1).
|
|
244
|
-
Environment variable: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB
|
|
245
|
-
Default: 100
|
|
246
|
-
|
|
247
|
-
arize_directory : str
|
|
248
|
-
Directory for Arize SDK files (cache, logs, etc.).
|
|
249
|
-
Environment variable: ARIZE_DIRECTORY
|
|
250
|
-
Default: "~/.arize"
|
|
251
|
-
|
|
252
|
-
enable_caching : bool
|
|
253
|
-
Whether to enable local caching.
|
|
254
|
-
Environment variable: ARIZE_ENABLE_CACHING
|
|
255
|
-
Default: True
|
|
256
|
-
|
|
257
|
-
region : Region
|
|
258
|
-
Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
|
|
259
|
-
individual host/port settings.
|
|
260
|
-
Environment variable: ARIZE_REGION
|
|
261
|
-
Default: Region.UNSPECIFIED
|
|
262
|
-
|
|
263
|
-
single_host : str
|
|
264
|
-
Single host to use for all endpoints. Overrides individual host settings.
|
|
265
|
-
Environment variable: ARIZE_SINGLE_HOST
|
|
266
|
-
Default: "" (not set)
|
|
267
|
-
|
|
268
|
-
single_port : int
|
|
269
|
-
Single port to use for all endpoints. Overrides individual port settings (0-65535).
|
|
270
|
-
Environment variable: ARIZE_SINGLE_PORT
|
|
271
|
-
Default: 0 (not set)
|
|
272
|
-
|
|
273
|
-
See Also:
|
|
274
|
-
--------
|
|
275
|
-
ArizeClient : Main client class that uses this configuration
|
|
179
|
+
Args:
|
|
180
|
+
api_key: Arize API key for authentication. Required.
|
|
181
|
+
Environment variable: ARIZE_API_KEY.
|
|
182
|
+
Default: None (must be provided via argument or environment variable).
|
|
183
|
+
api_host: API endpoint host.
|
|
184
|
+
Environment variable: ARIZE_API_HOST.
|
|
185
|
+
Default: "api.arize.com".
|
|
186
|
+
api_scheme: API endpoint scheme (http/https).
|
|
187
|
+
Environment variable: ARIZE_API_SCHEME.
|
|
188
|
+
Default: "https".
|
|
189
|
+
otlp_host: OTLP (OpenTelemetry Protocol) endpoint host.
|
|
190
|
+
Environment variable: ARIZE_OTLP_HOST.
|
|
191
|
+
Default: "otlp.arize.com".
|
|
192
|
+
otlp_scheme: OTLP endpoint scheme (http/https).
|
|
193
|
+
Environment variable: ARIZE_OTLP_SCHEME.
|
|
194
|
+
Default: "https".
|
|
195
|
+
flight_host: Apache Arrow Flight endpoint host.
|
|
196
|
+
Environment variable: ARIZE_FLIGHT_HOST.
|
|
197
|
+
Default: "flight.arize.com".
|
|
198
|
+
flight_port: Apache Arrow Flight endpoint port (1-65535).
|
|
199
|
+
Environment variable: ARIZE_FLIGHT_PORT.
|
|
200
|
+
Default: 443.
|
|
201
|
+
flight_scheme: Apache Arrow Flight endpoint scheme.
|
|
202
|
+
Environment variable: ARIZE_FLIGHT_SCHEME.
|
|
203
|
+
Default: "grpc+tls".
|
|
204
|
+
pyarrow_max_chunksize: Maximum chunk size for PyArrow operations (1 to MAX_CHUNKSIZE).
|
|
205
|
+
Environment variable: ARIZE_MAX_CHUNKSIZE.
|
|
206
|
+
Default: 10_000.
|
|
207
|
+
request_verify: Whether to verify SSL certificates for HTTP requests.
|
|
208
|
+
Environment variable: ARIZE_REQUEST_VERIFY.
|
|
209
|
+
Default: True.
|
|
210
|
+
stream_max_workers: Maximum number of worker threads for streaming operations (minimum: 1).
|
|
211
|
+
Environment variable: ARIZE_STREAM_MAX_WORKERS.
|
|
212
|
+
Default: 8.
|
|
213
|
+
stream_max_queue_bound: Maximum queue size for streaming operations (minimum: 1).
|
|
214
|
+
Environment variable: ARIZE_STREAM_MAX_QUEUE_BOUND.
|
|
215
|
+
Default: 5000.
|
|
216
|
+
max_http_payload_size_mb: Maximum HTTP payload size in megabytes (minimum: 1).
|
|
217
|
+
Environment variable: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
|
|
218
|
+
Default: 100.
|
|
219
|
+
arize_directory: Directory for Arize SDK files (cache, logs, etc.).
|
|
220
|
+
Environment variable: ARIZE_DIRECTORY.
|
|
221
|
+
Default: "~/.arize".
|
|
222
|
+
enable_caching: Whether to enable local caching.
|
|
223
|
+
Environment variable: ARIZE_ENABLE_CACHING.
|
|
224
|
+
Default: True.
|
|
225
|
+
region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
|
|
226
|
+
individual host/port settings.
|
|
227
|
+
Environment variable: ARIZE_REGION.
|
|
228
|
+
Default: Region.UNSPECIFIED.
|
|
229
|
+
single_host: Single host to use for all endpoints. Overrides individual host settings.
|
|
230
|
+
Environment variable: ARIZE_SINGLE_HOST.
|
|
231
|
+
Default: "" (not set).
|
|
232
|
+
single_port: Single port to use for all endpoints. Overrides individual port settings (0-65535).
|
|
233
|
+
Environment variable: ARIZE_SINGLE_PORT.
|
|
234
|
+
Default: 0 (not set).
|
|
276
235
|
"""
|
|
277
236
|
|
|
278
237
|
api_key: str = field(
|
arize/datasets/client.py
CHANGED
|
@@ -30,17 +30,22 @@ logger = logging.getLogger(__name__)
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
class DatasetsClient:
|
|
33
|
-
"""Client for managing datasets including creation, retrieval, and example management.
|
|
33
|
+
"""Client for managing datasets including creation, retrieval, and example management.
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
This class is primarily intended for internal use within the SDK. Users are
|
|
36
|
+
highly encouraged to access resource-specific functionality via
|
|
37
|
+
:class:`arize.ArizeClient`.
|
|
37
38
|
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
The datasets client is a thin wrapper around the generated REST API client,
|
|
40
|
+
using the shared generated API client owned by
|
|
41
|
+
:class:`arize.config.SDKConfiguration`.
|
|
42
|
+
"""
|
|
40
43
|
|
|
44
|
+
def __init__(self, *, sdk_config: SDKConfiguration) -> None:
|
|
45
|
+
"""
|
|
41
46
|
Args:
|
|
42
47
|
sdk_config: Resolved SDK configuration.
|
|
43
|
-
"""
|
|
48
|
+
""" # noqa: D205, D212
|
|
44
49
|
self._sdk_config = sdk_config
|
|
45
50
|
|
|
46
51
|
# Import at runtime so it's still lazy and extras-gated by the parent
|
|
@@ -49,10 +49,13 @@ class EmbeddingGeneratorForNLPSequenceClassification(NLPEmbeddingGenerator):
|
|
|
49
49
|
) -> pd.Series:
|
|
50
50
|
"""Obtain embedding vectors from your text data using pre-trained large language models.
|
|
51
51
|
|
|
52
|
-
:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
52
|
+
Args:
|
|
53
|
+
text_col: A pandas Series containing the different pieces of text.
|
|
54
|
+
class_label_col: If this column is passed, the sentence "The classification label
|
|
55
|
+
is <class_label>" will be appended to the text in the `text_col`.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
A pandas Series containing the embedding vectors.
|
|
56
59
|
"""
|
|
57
60
|
if not isinstance(text_col, pd.Series):
|
|
58
61
|
raise TypeError("text_col must be a pandas Series")
|
|
@@ -110,8 +113,11 @@ class EmbeddingGeneratorForNLPSummarization(NLPEmbeddingGenerator):
|
|
|
110
113
|
) -> pd.Series:
|
|
111
114
|
"""Obtain embedding vectors from your text data using pre-trained large language models.
|
|
112
115
|
|
|
113
|
-
:
|
|
114
|
-
|
|
116
|
+
Args:
|
|
117
|
+
text_col: A pandas Series containing the different pieces of text.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
A pandas Series containing the embedding vectors.
|
|
115
121
|
"""
|
|
116
122
|
if not isinstance(text_col, pd.Series):
|
|
117
123
|
raise TypeError("text_col must be a pandas Series")
|
|
@@ -11,7 +11,7 @@ from arize.embeddings.constants import (
|
|
|
11
11
|
IMPORT_ERROR_MESSAGE,
|
|
12
12
|
)
|
|
13
13
|
from arize.embeddings.usecases import UseCases
|
|
14
|
-
from arize.types import is_list_of
|
|
14
|
+
from arize.utils.types import is_list_of
|
|
15
15
|
|
|
16
16
|
try:
|
|
17
17
|
from datasets import Dataset
|
|
@@ -79,16 +79,19 @@ class EmbeddingGeneratorForTabularFeatures(NLPEmbeddingGenerator):
|
|
|
79
79
|
Prompts are generated from your `selected_columns` and passed to a pre-trained
|
|
80
80
|
large language model for embedding vector computation.
|
|
81
81
|
|
|
82
|
-
:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
82
|
+
Args:
|
|
83
|
+
df: Pandas DataFrame containing the tabular data. Not all columns will be
|
|
84
|
+
considered, see `selected_columns`.
|
|
85
|
+
selected_columns: Columns to be considered to construct the prompt to be passed to
|
|
86
|
+
the LLM.
|
|
87
|
+
col_name_map: Mapping between selected column names and a more verbose description of
|
|
88
|
+
the name. This helps the LLM understand the features better.
|
|
89
|
+
return_prompt_col: If set to True, an extra pandas Series will be returned
|
|
90
|
+
containing the constructed prompts. Defaults to False.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
A pandas Series containing the embedding vectors and, if `return_prompt_col` is
|
|
94
|
+
set to True, a pandas Series containing the prompts created from tabular features.
|
|
92
95
|
"""
|
|
93
96
|
if col_name_map is None:
|
|
94
97
|
col_name_map = {}
|
arize/experiments/__init__.py
CHANGED
|
@@ -1 +1,13 @@
|
|
|
1
1
|
"""Experiment tracking and evaluation functionality for the Arize SDK."""
|
|
2
|
+
|
|
3
|
+
from arize.experiments.evaluators.types import (
|
|
4
|
+
EvaluationResult,
|
|
5
|
+
EvaluationResultFieldNames,
|
|
6
|
+
)
|
|
7
|
+
from arize.experiments.types import ExperimentTaskResultFieldNames
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"EvaluationResult",
|
|
11
|
+
"EvaluationResultFieldNames",
|
|
12
|
+
"ExperimentTaskResultFieldNames",
|
|
13
|
+
]
|
arize/experiments/client.py
CHANGED
|
@@ -50,17 +50,22 @@ logger = logging.getLogger(__name__)
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
class ExperimentsClient:
|
|
53
|
-
"""Client for managing experiments including creation, execution, and result tracking.
|
|
53
|
+
"""Client for managing experiments including creation, execution, and result tracking.
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
This class is primarily intended for internal use within the SDK. Users are
|
|
56
|
+
highly encouraged to access resource-specific functionality via
|
|
57
|
+
:class:`arize.ArizeClient`.
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
The experiments client is a thin wrapper around the generated REST API client,
|
|
60
|
+
using the shared generated API client owned by
|
|
61
|
+
:class:`arize.config.SDKConfiguration`.
|
|
62
|
+
"""
|
|
60
63
|
|
|
64
|
+
def __init__(self, *, sdk_config: SDKConfiguration) -> None:
|
|
65
|
+
"""
|
|
61
66
|
Args:
|
|
62
67
|
sdk_config: Resolved SDK configuration.
|
|
63
|
-
"""
|
|
68
|
+
""" # noqa: D205, D212
|
|
64
69
|
self._sdk_config = sdk_config
|
|
65
70
|
from arize._generated import api_client as gen
|
|
66
71
|
|
|
@@ -16,12 +16,12 @@ from arize.constants.ml import (
|
|
|
16
16
|
MAX_TAG_LENGTH,
|
|
17
17
|
)
|
|
18
18
|
from arize.logging import log_a_list
|
|
19
|
-
from arize.types import Environments, ModelTypes
|
|
19
|
+
from arize.ml.types import Environments, ModelTypes
|
|
20
20
|
|
|
21
21
|
if TYPE_CHECKING:
|
|
22
22
|
from collections.abc import Iterable
|
|
23
23
|
|
|
24
|
-
from arize.types import Metrics
|
|
24
|
+
from arize.ml.types import Metrics
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class ValidationError(Exception, ABC):
|
|
@@ -40,8 +40,8 @@ from arize.constants.ml import (
|
|
|
40
40
|
MODEL_MAPPING_CONFIG,
|
|
41
41
|
)
|
|
42
42
|
from arize.logging import get_truncation_warning_message
|
|
43
|
-
from arize.
|
|
44
|
-
from arize.types import (
|
|
43
|
+
from arize.ml.batch_validation import errors as err
|
|
44
|
+
from arize.ml.types import (
|
|
45
45
|
CATEGORICAL_MODEL_TYPES,
|
|
46
46
|
NUMERIC_MODEL_TYPES,
|
|
47
47
|
BaseSchema,
|
|
@@ -53,9 +53,11 @@ from arize.types import (
|
|
|
53
53
|
ModelTypes,
|
|
54
54
|
PromptTemplateColumnNames,
|
|
55
55
|
Schema,
|
|
56
|
+
segments_intersect,
|
|
57
|
+
)
|
|
58
|
+
from arize.utils.types import (
|
|
56
59
|
is_dict_of,
|
|
57
60
|
is_iterable_of,
|
|
58
|
-
segments_intersect,
|
|
59
61
|
)
|
|
60
62
|
|
|
61
63
|
logger = logging.getLogger(__name__)
|