arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. arize/__init__.py +9 -2
  2. arize/_client_factory.py +50 -0
  3. arize/_exporter/client.py +18 -17
  4. arize/_exporter/parsers/tracing_data_parser.py +9 -4
  5. arize/_exporter/validation.py +1 -1
  6. arize/_flight/client.py +37 -17
  7. arize/_generated/api_client/api/datasets_api.py +6 -6
  8. arize/_generated/api_client/api/experiments_api.py +6 -6
  9. arize/_generated/api_client/api/projects_api.py +3 -3
  10. arize/_lazy.py +61 -10
  11. arize/client.py +66 -50
  12. arize/config.py +175 -48
  13. arize/constants/config.py +1 -0
  14. arize/constants/ml.py +9 -16
  15. arize/constants/spans.py +5 -10
  16. arize/datasets/client.py +45 -28
  17. arize/datasets/errors.py +1 -1
  18. arize/datasets/validation.py +2 -2
  19. arize/embeddings/auto_generator.py +16 -9
  20. arize/embeddings/base_generators.py +15 -9
  21. arize/embeddings/cv_generators.py +2 -2
  22. arize/embeddings/errors.py +2 -2
  23. arize/embeddings/nlp_generators.py +8 -8
  24. arize/embeddings/tabular_generators.py +6 -6
  25. arize/exceptions/base.py +0 -52
  26. arize/exceptions/config.py +22 -0
  27. arize/exceptions/parameters.py +1 -330
  28. arize/exceptions/values.py +8 -5
  29. arize/experiments/__init__.py +4 -0
  30. arize/experiments/client.py +31 -18
  31. arize/experiments/evaluators/base.py +12 -9
  32. arize/experiments/evaluators/executors.py +16 -7
  33. arize/experiments/evaluators/rate_limiters.py +3 -1
  34. arize/experiments/evaluators/types.py +9 -7
  35. arize/experiments/evaluators/utils.py +7 -5
  36. arize/experiments/functions.py +128 -58
  37. arize/experiments/tracing.py +4 -1
  38. arize/experiments/types.py +34 -31
  39. arize/logging.py +54 -33
  40. arize/ml/batch_validation/errors.py +10 -1004
  41. arize/ml/batch_validation/validator.py +351 -291
  42. arize/ml/bounded_executor.py +25 -6
  43. arize/ml/casting.py +51 -33
  44. arize/ml/client.py +43 -35
  45. arize/ml/proto.py +21 -22
  46. arize/ml/stream_validation.py +64 -27
  47. arize/ml/surrogate_explainer/mimic.py +18 -10
  48. arize/ml/types.py +27 -67
  49. arize/pre_releases.py +10 -6
  50. arize/projects/client.py +9 -4
  51. arize/py.typed +0 -0
  52. arize/regions.py +11 -11
  53. arize/spans/client.py +125 -31
  54. arize/spans/columns.py +32 -36
  55. arize/spans/conversion.py +12 -11
  56. arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
  57. arize/spans/validation/annotations/value_validation.py +11 -14
  58. arize/spans/validation/common/argument_validation.py +3 -3
  59. arize/spans/validation/common/dataframe_form_validation.py +7 -7
  60. arize/spans/validation/common/value_validation.py +11 -14
  61. arize/spans/validation/evals/dataframe_form_validation.py +4 -4
  62. arize/spans/validation/evals/evals_validation.py +6 -6
  63. arize/spans/validation/evals/value_validation.py +1 -1
  64. arize/spans/validation/metadata/argument_validation.py +1 -1
  65. arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
  66. arize/spans/validation/metadata/value_validation.py +23 -1
  67. arize/spans/validation/spans/dataframe_form_validation.py +2 -2
  68. arize/spans/validation/spans/spans_validation.py +6 -6
  69. arize/utils/arrow.py +38 -2
  70. arize/utils/cache.py +2 -2
  71. arize/utils/dataframe.py +4 -4
  72. arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
  73. arize/utils/openinference_conversion.py +10 -10
  74. arize/utils/proto.py +0 -1
  75. arize/utils/types.py +6 -6
  76. arize/version.py +1 -1
  77. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
  78. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
  79. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
  80. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
  81. {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0
arize/client.py CHANGED
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
  import logging
6
6
  import shutil
7
7
  from pathlib import Path
8
- from typing import TYPE_CHECKING, ClassVar
8
+ from typing import TYPE_CHECKING, ClassVar, cast
9
9
 
10
10
  from arize._lazy import LazySubclientsMixin
11
11
  from arize.config import SDKConfiguration
@@ -20,42 +20,22 @@ if TYPE_CHECKING:
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
23
- # TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
24
-
25
- # TODO(Kiko): Clean commented lines over the SDK
26
- # TODO(Kiko): Implement https://github.com/Arize-ai/arize/pull/59917
27
-
28
- # TODO(Kiko): Go private connect. Need a `base_domain`, such that we get:
29
- # - api.<base_domain>
30
- # - app.<base_domain>
31
- # - flight.<base_domain>
32
- # - otlp.<base_domain>
33
-
34
- # TODO(Kiko): Enforce type checking, remove all type ignores
35
-
36
- # TODO(Kiko): Go over docstrings
37
- # TODO(Kiko): Missing parameter descriptions in some docstrings
38
- # TODO(Kiko): Missing return descriptions in some docstrings
39
-
40
23
  # TODO(Kiko): Go over headers on each logging call
24
+ # TODO(Kiko): InvalidAdditionalHeadersError is unused. Have we handled extra headers?
41
25
 
42
26
  # TODO(Kiko): Need to implement 'Update existing examples in a dataset'
43
27
 
44
- # TODO(Kiko): why logs don't show on scripts, only on jupyter notebooks
45
- # TODO(Kiko): test caching in colab environment
46
28
  # TODO(Kiko): Protobuf versioning is too old
47
29
  # TODO(Kiko): Go through main APIs and add CtxAdapter where missing
48
30
  # TODO(Kiko): Search and handle other TODOs
49
- # TODO(Kiko): Go over **every file** and do not import anything at runtime, use `if TYPE_CHECKING`
50
- # with `from __future__ import annotations` (must include for Python < 3.11)
51
31
 
52
32
 
53
33
  class ArizeClient(LazySubclientsMixin):
54
34
  """Root client for the Arize SDK.
55
35
 
56
36
  The ArizeClient provides access to all Arize platform services including datasets,
57
- experiments, ML models, projects, and spans. It uses SDKConfiguration internally to
58
- manage configuration settings.
37
+ experiments, ML models, projects, and spans. It uses :class:`arize.config.SDKConfiguration`
38
+ internally to manage configuration settings.
59
39
 
60
40
  All parameters are optional (except api_key which must be provided via argument
61
41
  or environment variable). For each parameter, values are resolved in this order:
@@ -139,6 +119,7 @@ class ArizeClient(LazySubclientsMixin):
139
119
  enable_caching: bool | None = None,
140
120
  single_host: str | None = None,
141
121
  single_port: int | None = None,
122
+ base_domain: str | None = None,
142
123
  ) -> None:
143
124
  """Initialize the Arize client with configuration parameters.
144
125
 
@@ -147,44 +128,77 @@ class ArizeClient(LazySubclientsMixin):
147
128
 
148
129
  Args:
149
130
  api_key: Arize API key for authentication. Required - must be provided here
150
- or via ARIZE_API_KEY environment variable. Raises MissingAPIKeyError if not set.
131
+ or via ARIZE_API_KEY environment variable.
132
+ Raises MissingAPIKeyError if not set.
151
133
  region: Arize region (e.g., Region.US_CENTRAL, Region.EU_WEST). When specified,
152
- overrides individual host/port settings. ENV: ARIZE_REGION. Default: Region.UNSPECIFIED.
153
- api_host: Custom API endpoint host. ENV: ARIZE_API_HOST. Default: "api.arize.com".
154
- api_scheme: API endpoint scheme (http/https). ENV: ARIZE_API_SCHEME. Default: "https".
155
- otlp_host: OTLP endpoint host. ENV: ARIZE_OTLP_HOST. Default: "otlp.arize.com".
156
- otlp_scheme: OTLP endpoint scheme (http/https). ENV: ARIZE_OTLP_SCHEME. Default: "https".
157
- flight_host: Apache Arrow Flight endpoint host. ENV: ARIZE_FLIGHT_HOST.
134
+ overrides individual host/port settings.
135
+ ENV: ARIZE_REGION.
136
+ Default: Region.UNSET.
137
+ api_host: Custom API endpoint host.
138
+ ENV: ARIZE_API_HOST.
139
+ Default: "api.arize.com".
140
+ api_scheme: API endpoint scheme (http/https).
141
+ ENV: ARIZE_API_SCHEME.
142
+ Default: "https".
143
+ otlp_host: OTLP endpoint host.
144
+ ENV: ARIZE_OTLP_HOST.
145
+ Default: "otlp.arize.com".
146
+ otlp_scheme: OTLP endpoint scheme (http/https).
147
+ ENV: ARIZE_OTLP_SCHEME.
148
+ Default: "https".
149
+ flight_host: Apache Arrow Flight endpoint host.
150
+ ENV: ARIZE_FLIGHT_HOST.
158
151
  Default: "flight.arize.com".
159
- flight_port: Apache Arrow Flight endpoint port (1-65535). ENV: ARIZE_FLIGHT_PORT.
152
+ flight_port: Apache Arrow Flight endpoint port (1-65535).
153
+ ENV: ARIZE_FLIGHT_PORT.
160
154
  Default: 443.
161
- flight_scheme: Apache Arrow Flight endpoint scheme. ENV: ARIZE_FLIGHT_SCHEME.
155
+ flight_scheme: Apache Arrow Flight endpoint scheme.
156
+ ENV: ARIZE_FLIGHT_SCHEME.
162
157
  Default: "grpc+tls".
163
158
  pyarrow_max_chunksize: Maximum PyArrow chunk size (1 to MAX_CHUNKSIZE).
164
- ENV: ARIZE_MAX_CHUNKSIZE. Default: 10_000.
165
- request_verify: Whether to verify SSL certificates. ENV: ARIZE_REQUEST_VERIFY.
159
+ ENV: ARIZE_MAX_CHUNKSIZE.
160
+ Default: 10_000.
161
+ request_verify: Whether to verify SSL certificates.
162
+ ENV: ARIZE_REQUEST_VERIFY.
166
163
  Default: True.
167
164
  stream_max_workers: Maximum worker threads for streaming (minimum: 1).
168
- ENV: ARIZE_STREAM_MAX_WORKERS. Default: 8.
165
+ ENV: ARIZE_STREAM_MAX_WORKERS.
166
+ Default: 8.
169
167
  stream_max_queue_bound: Maximum queue size for streaming (minimum: 1).
170
- ENV: ARIZE_STREAM_MAX_QUEUE_BOUND. Default: 5000.
168
+ ENV: ARIZE_STREAM_MAX_QUEUE_BOUND.
169
+ Default: 5000.
171
170
  max_http_payload_size_mb: Maximum HTTP payload size in MB (minimum: 1).
172
- ENV: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB. Default: 100.
171
+ ENV: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
172
+ Default: 100.
173
173
  arize_directory: Directory for SDK files (cache, logs, etc.).
174
- ENV: ARIZE_DIRECTORY. Default: "~/.arize".
175
- enable_caching: Whether to enable local caching. ENV: ARIZE_ENABLE_CACHING.
174
+ ENV: ARIZE_DIRECTORY.
175
+ Default: "~/.arize".
176
+ enable_caching: Whether to enable local caching.
177
+ ENV: ARIZE_ENABLE_CACHING.
176
178
  Default: True.
177
- single_host: Single host for all endpoints (overrides individual hosts).
178
- ENV: ARIZE_SINGLE_HOST. Default: None.
179
- single_port: Single port for all endpoints (overrides individual ports).
180
- ENV: ARIZE_SINGLE_PORT. Default: 0 (not set).
179
+ single_host: Single host for all endpoints. When specified, overrides
180
+ individual hosts.
181
+ ENV: ARIZE_SINGLE_HOST.
182
+ Default: None.
183
+ single_port: Single port for all endpoints. When specified, overrides
184
+ individual ports.
185
+ ENV: ARIZE_SINGLE_PORT.
186
+ Default: 0 (not set).
187
+ base_domain: Base domain for generating endpoint hosts as api.<base_domain>,
188
+ otlp.<base_domain>, flight.<base_domain>. Intended for Private Connect setups.
189
+ When specified, overrides individual hosts.
190
+ ENV: ARIZE_BASE_DOMAIN.
191
+ Default: None.
181
192
 
182
193
  Raises:
183
194
  MissingAPIKeyError: If api_key is not provided via argument or environment variable.
195
+ MultipleEndpointOverridesError: If multiple endpoint override options (region,
196
+ single_host/single_port, base_domain) are provided.
184
197
 
185
198
  Notes:
186
199
  Values provided to this class override environment variables, which in turn
187
- override default values. See SDKConfiguration for detailed parameter documentation.
200
+ override default values. See :class:`arize.config.SDKConfiguration`
201
+ for detailed parameter documentation.
188
202
  """
189
203
  cfg_kwargs: dict = {}
190
204
  if api_key is not None:
@@ -223,6 +237,8 @@ class ArizeClient(LazySubclientsMixin):
223
237
  cfg_kwargs["single_host"] = single_host
224
238
  if single_port is not None:
225
239
  cfg_kwargs["single_port"] = single_port
240
+ if base_domain is not None:
241
+ cfg_kwargs["base_domain"] = base_domain
226
242
 
227
243
  # Only the explicitly provided fields are passed; the rest use
228
244
  # SDKConfiguration's default factories / defaults.
@@ -232,27 +248,27 @@ class ArizeClient(LazySubclientsMixin):
232
248
  @property
233
249
  def datasets(self) -> DatasetsClient:
234
250
  """Access the datasets client for dataset operations (lazy-loaded)."""
235
- return self.__getattr__("datasets")
251
+ return cast("DatasetsClient", self.__getattr__("datasets"))
236
252
 
237
253
  @property
238
254
  def experiments(self) -> ExperimentsClient:
239
255
  """Access the experiments client for experiment operations (lazy-loaded)."""
240
- return self.__getattr__("experiments")
256
+ return cast("ExperimentsClient", self.__getattr__("experiments"))
241
257
 
242
258
  @property
243
259
  def ml(self) -> MLModelsClient:
244
260
  """Access the ML models client for ML model operations (lazy-loaded)."""
245
- return self.__getattr__("ml")
261
+ return cast("MLModelsClient", self.__getattr__("ml"))
246
262
 
247
263
  @property
248
264
  def projects(self) -> ProjectsClient:
249
265
  """Access the projects client for project operations (lazy-loaded)."""
250
- return self.__getattr__("projects")
266
+ return cast("ProjectsClient", self.__getattr__("projects"))
251
267
 
252
268
  @property
253
269
  def spans(self) -> SpansClient:
254
270
  """Access the spans client for tracing and span operations (lazy-loaded)."""
255
- return self.__getattr__("spans")
271
+ return cast("SpansClient", self.__getattr__("spans"))
256
272
 
257
273
  def __repr__(self) -> str:
258
274
  """Return a string representation of the Arize client configuration."""
arize/config.py CHANGED
@@ -3,10 +3,8 @@
3
3
  import logging
4
4
  import os
5
5
  import sys
6
- import threading
7
6
  from dataclasses import dataclass, field, fields
8
7
  from pathlib import Path
9
- from typing import Any
10
8
 
11
9
  from arize.constants.config import (
12
10
  DEFAULT_API_HOST,
@@ -27,6 +25,7 @@ from arize.constants.config import (
27
25
  ENV_API_KEY,
28
26
  ENV_API_SCHEME,
29
27
  ENV_ARIZE_DIRECTORY,
28
+ ENV_BASE_DOMAIN,
30
29
  ENV_ENABLE_CACHING,
31
30
  ENV_FLIGHT_HOST,
32
31
  ENV_FLIGHT_PORT,
@@ -44,6 +43,7 @@ from arize.constants.config import (
44
43
  )
45
44
  from arize.constants.pyarrow import MAX_CHUNKSIZE
46
45
  from arize.exceptions.auth import MissingAPIKeyError
46
+ from arize.exceptions.config import MultipleEndpointOverridesError
47
47
  from arize.regions import REGION_ENDPOINTS, Region
48
48
  from arize.version import __version__
49
49
 
@@ -55,18 +55,44 @@ ALLOWED_HTTP_SCHEMES = {"http", "https"}
55
55
 
56
56
 
57
57
  def _is_sensitive_field(name: str) -> bool:
58
+ """Check if a field name contains sensitive information markers.
59
+
60
+ Args:
61
+ name: The field name to check.
62
+
63
+ Returns:
64
+ bool: True if the field name contains 'key', 'token', or 'secret' (case-insensitive).
65
+ """
58
66
  n = name.lower()
59
67
  return bool(any(k in n for k in SENSITIVE_FIELD_MARKERS))
60
68
 
61
69
 
62
70
  def _mask_secret(secret: str, N: int = 4) -> str:
63
- """Show first N chars then '***'; empty string if empty."""
71
+ """Mask a secret string by showing only the first N characters.
72
+
73
+ Args:
74
+ secret: The secret string to mask.
75
+ N: Number of characters to show before masking. Defaults to 4.
76
+
77
+ Returns:
78
+ str: The masked string (first N chars + '***'), or empty string if input is empty.
79
+ """
64
80
  if len(secret) == 0:
65
81
  return ""
66
82
  return f"{secret[:N]}***"
67
83
 
68
84
 
69
85
  def _endpoint(scheme: str, base: str, path: str = "") -> str:
86
+ """Construct a full endpoint URL from scheme, base, and optional path.
87
+
88
+ Args:
89
+ scheme: The URL scheme (e.g., "http", "https").
90
+ base: The base URL or hostname.
91
+ path: Optional path to append to the base URL. Defaults to empty string.
92
+
93
+ Returns:
94
+ str: The fully constructed endpoint URL.
95
+ """
70
96
  endpoint = scheme + "://" + base.rstrip("/")
71
97
  if path:
72
98
  endpoint += "/" + path.lstrip("/")
@@ -74,6 +100,18 @@ def _endpoint(scheme: str, base: str, path: str = "") -> str:
74
100
 
75
101
 
76
102
  def _env_http_scheme(name: str, default: str) -> str:
103
+ """Get an HTTP scheme from environment variable with validation.
104
+
105
+ Args:
106
+ name: The environment variable name.
107
+ default: The default value if the environment variable is not set.
108
+
109
+ Returns:
110
+ str: The validated HTTP scheme ('http' or 'https').
111
+
112
+ Raises:
113
+ ValueError: If the scheme is not 'http' or 'https'.
114
+ """
77
115
  v = _env_str(name, default).lower()
78
116
  if v not in ALLOWED_HTTP_SCHEMES:
79
117
  raise ValueError(
@@ -88,6 +126,20 @@ def _env_str(
88
126
  min_len: int | None = None,
89
127
  max_len: int | None = None,
90
128
  ) -> str:
129
+ """Get a string value from environment variable with length validation.
130
+
131
+ Args:
132
+ name: The environment variable name.
133
+ default: The default value if the environment variable is not set.
134
+ min_len: Optional minimum length constraint for the string.
135
+ max_len: Optional maximum length constraint for the string.
136
+
137
+ Returns:
138
+ str: The validated string value (stripped of whitespace).
139
+
140
+ Raises:
141
+ ValueError: If the string length violates min_len or max_len constraints.
142
+ """
91
143
  val = os.getenv(name, default).strip()
92
144
 
93
145
  if min_len is not None and len(val) < min_len:
@@ -109,6 +161,20 @@ def _env_int(
109
161
  min_val: int | None = None,
110
162
  max_val: int | None = None,
111
163
  ) -> int:
164
+ """Get an integer value from environment variable with range validation.
165
+
166
+ Args:
167
+ name: The environment variable name.
168
+ default: The default value if the environment variable is not set.
169
+ min_val: Optional minimum value constraint for the integer.
170
+ max_val: Optional maximum value constraint for the integer.
171
+
172
+ Returns:
173
+ int: The validated integer value.
174
+
175
+ Raises:
176
+ ValueError: If the value cannot be parsed as an integer or violates min_val/max_val constraints.
177
+ """
112
178
  raw = os.getenv(name, default)
113
179
  try:
114
180
  val = int(raw)
@@ -134,6 +200,20 @@ def _env_float(
134
200
  min_val: float | None = None,
135
201
  max_val: float | None = None,
136
202
  ) -> float:
203
+ """Get a float value from environment variable with range validation.
204
+
205
+ Args:
206
+ name: The environment variable name.
207
+ default: The default value if the environment variable is not set.
208
+ min_val: Optional minimum value constraint for the float.
209
+ max_val: Optional maximum value constraint for the float.
210
+
211
+ Returns:
212
+ float: The validated float value.
213
+
214
+ Raises:
215
+ ValueError: If the value cannot be parsed as a float or violates min_val/max_val constraints.
216
+ """
137
217
  raw = os.getenv(name, default)
138
218
  try:
139
219
  val = float(raw)
@@ -154,10 +234,28 @@ def _env_float(
154
234
 
155
235
 
156
236
  def _env_bool(name: str, default: bool) -> bool:
237
+ """Get a boolean value from environment variable.
238
+
239
+ Args:
240
+ name: The environment variable name.
241
+ default: The default boolean value if the environment variable is not set.
242
+
243
+ Returns:
244
+ bool: The parsed boolean value.
245
+ """
157
246
  return _parse_bool(os.getenv(name, str(default)))
158
247
 
159
248
 
160
249
  def _parse_bool(val: bool | str | None) -> bool:
250
+ """Parse a boolean value from various input types.
251
+
252
+ Args:
253
+ val: The value to parse. Can be a bool, string, or None.
254
+
255
+ Returns:
256
+ bool: True if the value is already True or matches one of the truthy strings
257
+ ('1', 'true', 'yes', 'on', case-insensitive). False otherwise.
258
+ """
161
259
  if isinstance(val, bool):
162
260
  return val
163
261
  return (val or "").strip().lower() in {"1", "true", "yes", "on"}
@@ -167,6 +265,9 @@ def _parse_bool(val: bool | str | None) -> bool:
167
265
  class SDKConfiguration:
168
266
  """Configuration for the Arize SDK with endpoint and authentication settings.
169
267
 
268
+ This class holds pure configuration data and does not manage client lifecycle.
269
+ Client creation and caching is handled by :class:`arize.ArizeClient`.
270
+
170
271
  This class is used internally by ArizeClient to manage SDK configuration. It is not
171
272
  recommended to use this class directly; users should interact with ArizeClient
172
273
  instead.
@@ -225,13 +326,28 @@ class SDKConfiguration:
225
326
  region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
226
327
  individual host/port settings.
227
328
  Environment variable: ARIZE_REGION.
228
- Default: Region.UNSPECIFIED.
229
- single_host: Single host to use for all endpoints. Overrides individual host settings.
329
+ Default: :class:`Region.UNSET`.
330
+ single_host: Single host to use for all endpoints. When specified, overrides
331
+ individual host settings.
230
332
  Environment variable: ARIZE_SINGLE_HOST.
231
333
  Default: "" (not set).
232
- single_port: Single port to use for all endpoints. Overrides individual port settings (0-65535).
334
+ single_port: Single port to use for all endpoints. When specified, overrides
335
+ individual port settings (0-65535).
233
336
  Environment variable: ARIZE_SINGLE_PORT.
234
337
  Default: 0 (not set).
338
+ base_domain: Base domain for generating all endpoint hosts. Intended for Private Connect
339
+ setups. When specified, generates hosts as api.<base_domain>, otlp.<base_domain>,
340
+ flight.<base_domain>. When specified, overrides individual host settings.
341
+ Environment variable: ARIZE_BASE_DOMAIN.
342
+ Default: "" (not set).
343
+
344
+ Note:
345
+ The endpoint override options (region, single_host/single_port, base_domain) are
346
+ mutually exclusive. Specifying more than one will raise MultipleEndpointOverridesError.
347
+
348
+ Raises:
349
+ MissingAPIKeyError: If api_key is not provided via argument or environment variable.
350
+ MultipleEndpointOverridesError: If multiple endpoint override options are provided.
235
351
  """
236
352
 
237
353
  api_key: str = field(
@@ -322,33 +438,73 @@ class SDKConfiguration:
322
438
  ENV_SINGLE_PORT, 0, min_val=0, max_val=65535
323
439
  )
324
440
  )
325
-
326
- # Private, excluded from comparisons & repr
327
- _gen_client: Any = field(default=None, repr=False, compare=False)
328
- _gen_lock: threading.Lock = field(
329
- default_factory=threading.Lock, repr=False, compare=False
441
+ base_domain: str = field(
442
+ default_factory=lambda: _env_str(ENV_BASE_DOMAIN, "")
330
443
  )
331
444
 
332
445
  def __post_init__(self) -> None:
333
446
  """Validate and configure SDK endpoints after initialization.
334
447
 
448
+ Endpoint override options are mutually exclusive. Only one of the following
449
+ can be specified:
450
+ 1. region - Overrides all via REGION_ENDPOINTS mapping
451
+ 2. single_host/single_port - Overrides individual hosts/ports
452
+ 3. base_domain - Generates hosts from base domain
453
+
454
+ If none are specified, per-endpoint host/port settings are used.
455
+
335
456
  Raises:
336
- MissingAPIKeyError: If API key is not provided.
457
+ MissingAPIKeyError: If api_key is not provided.
458
+ MultipleEndpointOverridesError: If multiple endpoint override options are provided.
337
459
  """
338
- # Validate Configuration
460
+ # Validate configuration
339
461
  if not self.api_key:
340
462
  raise MissingAPIKeyError()
341
463
 
464
+ # Check which override options are set
465
+ has_base_domain = bool(self.base_domain)
342
466
  has_single_host = bool(self.single_host)
343
467
  has_single_port = self.single_port != 0
344
- has_region = self.region is not Region.UNSPECIFIED
345
- if (has_single_host or has_single_port) and has_region:
468
+ has_region = self.region is not Region.UNSET
469
+
470
+ # Ensure only one override method is used (mutually exclusive)
471
+ override_count = sum(
472
+ [has_base_domain, has_single_host or has_single_port, has_region]
473
+ )
474
+ if override_count > 1:
475
+ # Determine which overrides were provided
476
+ provided_overrides = []
477
+ if has_region:
478
+ provided_overrides.append(f"region={self.region.value}")
479
+ if has_single_host or has_single_port:
480
+ if has_single_host:
481
+ provided_overrides.append(
482
+ f"single_host={self.single_host!r}"
483
+ )
484
+ if has_single_port:
485
+ provided_overrides.append(f"single_port={self.single_port}")
486
+ if has_base_domain:
487
+ provided_overrides.append(f"base_domain={self.base_domain!r}")
488
+
489
+ error_message = (
490
+ f"Multiple endpoint override options provided: {', '.join(provided_overrides)}. "
491
+ "Only one of the following can be specified: 'region', "
492
+ "'single_host'/'single_port', or 'base_domain'."
493
+ )
494
+ logger.error(error_message)
495
+ raise MultipleEndpointOverridesError(error_message)
496
+
497
+ if has_base_domain:
346
498
  logger.info(
347
- "Multiple endpoint override options provided. Preference order is: "
348
- "region > single_host/single_port > per-endpoint host/port."
499
+ "Base domain %r provided; generating hosts from base domain.",
500
+ self.base_domain,
501
+ )
502
+ object.__setattr__(self, "api_host", f"api.{self.base_domain}")
503
+ object.__setattr__(self, "otlp_host", f"otlp.{self.base_domain}")
504
+ object.__setattr__(
505
+ self, "flight_host", f"flight.{self.base_domain}"
349
506
  )
350
507
 
351
- # Single host override: if single_host is set, it overrides hosts
352
508
  if has_single_host:
353
509
  logger.info(
354
510
  "Single host %r provided; overriding hosts configuration with single host.",
@@ -358,7 +514,6 @@ class SDKConfiguration:
358
514
  object.__setattr__(self, "otlp_host", self.single_host)
359
515
  object.__setattr__(self, "flight_host", self.single_host)
360
516
 
361
- # Single port override: if single_port is set, it overrides ports
362
517
  if has_single_port:
363
518
  logger.info(
364
519
  "Single port %s provided; overriding ports configuration with single port.",
@@ -366,15 +521,12 @@ class SDKConfiguration:
366
521
  )
367
522
  object.__setattr__(self, "flight_port", self.single_port)
368
523
 
369
- # Region override: if region is set, it *always* wins over host/port fields
370
524
  if has_region:
371
- endpoints = REGION_ENDPOINTS[self.region]
372
-
373
- # Override config (region trumps everything)
374
525
  logger.info(
375
526
  "Region %s provided; overriding hosts & ports configuration with region defaults.",
376
527
  self.region.value,
377
528
  )
529
+ endpoints = REGION_ENDPOINTS[self.region]
378
530
  object.__setattr__(self, "api_host", endpoints.api_host)
379
531
  object.__setattr__(self, "otlp_host", endpoints.otlp_host)
380
532
  object.__setattr__(self, "flight_host", endpoints.flight_host)
@@ -444,28 +596,3 @@ class SDKConfiguration:
444
596
  lines.append(f" {f.name}={val!r},")
445
597
  lines.append(")")
446
598
  return "\n".join(lines)
447
-
448
- # TODO(Kiko): This may not be well placed in this class
449
- def get_generated_client(self) -> object:
450
- """Get or create the generated OpenAPI client instance."""
451
- # If already cached, return immediately
452
- if self._gen_client is not None:
453
- return self._gen_client
454
-
455
- # Thread-safe initialization
456
- with self._gen_lock:
457
- if self._gen_client is not None:
458
- return self._gen_client
459
-
460
- # Import lazily so extra dependencies can be
461
- # enforced outside the configuration class
462
- from arize._generated import api_client as gen
463
-
464
- cfg = gen.Configuration(host=self.api_url)
465
- if self.api_key:
466
- cfg.access_token = self.api_key
467
- client = gen.ApiClient(cfg)
468
-
469
- # Bypass frozen to set the cache once
470
- object.__setattr__(self, "_gen_client", client)
471
- return client
arize/constants/config.py CHANGED
@@ -14,6 +14,7 @@ ENV_FLIGHT_PORT = "ARIZE_FLIGHT_PORT"
14
14
  ENV_FLIGHT_SCHEME = "ARIZE_FLIGHT_SCHEME"
15
15
  ENV_SINGLE_HOST = "ARIZE_SINGLE_HOST"
16
16
  ENV_SINGLE_PORT = "ARIZE_SINGLE_PORT"
17
+ ENV_BASE_DOMAIN = "ARIZE_BASE_DOMAIN"
17
18
  ENV_PYARROW_MAX_CHUNKSIZE = "ARIZE_MAX_CHUNKSIZE"
18
19
  ENV_REQUEST_VERIFY = "ARIZE_REQUEST_VERIFY"
19
20
  ENV_MAX_HTTP_PAYLOAD_SIZE_MB = "ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB"
arize/constants/ml.py CHANGED
@@ -3,50 +3,43 @@
3
3
  import json
4
4
  from pathlib import Path
5
5
 
6
- # MAX_BYTES_PER_BULK_RECORD = 100000
7
- # MAX_DAYS_WITHIN_RANGE = 365
8
6
  MIN_PREDICTION_ID_LEN = 1
9
7
  MAX_PREDICTION_ID_LEN = 512
10
8
  MIN_DOCUMENT_ID_LEN = 1
11
9
  MAX_DOCUMENT_ID_LEN = 128
12
- # # The maximum number of character for tag values
10
+ # The maximum number of character for tag values
13
11
  MAX_TAG_LENGTH = 20_000
14
12
  MAX_TAG_LENGTH_TRUNCATION = 1_000
15
- # # The maximum number of character for embedding raw data
13
+ # The maximum number of character for embedding raw data
16
14
  MAX_RAW_DATA_CHARACTERS = 2_000_000
17
15
  MAX_RAW_DATA_CHARACTERS_TRUNCATION = 5_000
18
16
  # The maximum number of acceptable years in the past from current time for prediction_timestamps
19
17
  MAX_PAST_YEARS_FROM_CURRENT_TIME = 5
20
18
  # The maximum number of acceptable years in the future from current time for prediction_timestamps
21
19
  MAX_FUTURE_YEARS_FROM_CURRENT_TIME = 1
22
- # # The maximum number of character for llm model name
20
+ # The maximum number of character for llm model name
23
21
  MAX_LLM_MODEL_NAME_LENGTH = 20_000
24
22
  MAX_LLM_MODEL_NAME_LENGTH_TRUNCATION = 50
25
- # # The maximum number of character for prompt template
23
+ # The maximum number of character for prompt template
26
24
  MAX_PROMPT_TEMPLATE_LENGTH = 50_000
27
25
  MAX_PROMPT_TEMPLATE_LENGTH_TRUNCATION = 5_000
28
- # # The maximum number of character for prompt template version
26
+ # The maximum number of character for prompt template version
29
27
  MAX_PROMPT_TEMPLATE_VERSION_LENGTH = 20_000
30
28
  MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
31
- # # The maximum number of embeddings
29
+ # The maximum number of embeddings
32
30
  MAX_NUMBER_OF_EMBEDDINGS = 30
33
31
  MAX_EMBEDDING_DIMENSIONALITY = 20_000
34
- # # The maximum number of classes for multi class
32
+ # The maximum number of classes for multi class
35
33
  MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 500
36
34
  MAX_MULTI_CLASS_NAME_LENGTH = 100
37
35
  # The maximum number of references in embedding similarity search params
38
36
  MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
39
- #
40
- # # Arize generated columns
41
- # GENERATED_PREDICTION_LABEL_COL = "arize_generated_prediction_label"
42
- # GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
43
- #
44
- # # reserved columns for LLM run metadata
37
+ # reserved columns for LLM run metadata
45
38
  LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count" # noqa: S105
46
39
  LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count" # noqa: S105
47
40
  LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count" # noqa: S105
48
41
  LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
49
- #
42
+
50
43
  # all reserved tags
51
44
  RESERVED_TAG_COLS = [
52
45
  LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME,
arize/constants/spans.py CHANGED
@@ -5,19 +5,15 @@ DEFAULT_DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%f+00:00"
5
5
  # Minumum/Maximum number of characters for span/trace/parent ids in spans
6
6
  SPAN_ID_MIN_STR_LENGTH = 12
7
7
  SPAN_ID_MAX_STR_LENGTH = 128
8
- # # Minumum/Maximum number of characters for span name
8
+ # Minumum/Maximum number of characters for span name
9
9
  SPAN_NAME_MIN_STR_LENGTH = 0
10
10
  SPAN_NAME_MAX_STR_LENGTH = 50
11
- # # Minumum/Maximum number of characters for span status message
11
+ # Minumum/Maximum number of characters for span status message
12
12
  SPAN_STATUS_MSG_MIN_STR_LENGTH = 0
13
13
  SPAN_STATUS_MSG_MAX_STR_LENGTH = 10_000
14
- # # Minumum/Maximum number of characters for span event name
14
+ # Minumum/Maximum number of characters for span event name
15
15
  SPAN_EVENT_NAME_MAX_STR_LENGTH = 100
16
- # # Minumum/Maximum number of characters for span event attributes
17
- # SPAN_EVENT_ATTRS_MAX_STR_LENGTH = 10_000
18
- # # Maximum number of characters for span kind
19
- # SPAN_KIND_MAX_STR_LENGTH = 100
20
- # SPAN_EXCEPTION_TYPE_MAX_STR_LENGTH = 100
16
+ # Minumum/Maximum number of characters for span event attributes
21
17
  SPAN_EXCEPTION_MESSAGE_MAX_STR_LENGTH = 100
22
18
  SPAN_EXCEPTION_STACK_TRACE_MAX_STR_LENGTH = 10_000
23
19
  SPAN_IO_VALUE_MAX_STR_LENGTH = 4_000_000
@@ -29,7 +25,6 @@ SPAN_LLM_MESSAGE_ROLE_MAX_STR_LENGTH = 100
29
25
  SPAN_LLM_MESSAGE_CONTENT_MAX_STR_LENGTH = 4_000_000
30
26
  SPAN_LLM_TOOL_CALL_FUNCTION_NAME_MAX_STR_LENGTH = 500
31
27
  SPAN_LLM_PROMPT_TEMPLATE_MAX_STR_LENGTH = 4_000_000
32
- # SPAN_LLM_PROMPT_TEMPLATE_VARIABLES_MAX_STR_LENGTH = 10_000
33
28
  SPAN_LLM_PROMPT_TEMPLATE_VERSION_MAX_STR_LENGTH = 100
34
29
  SPAN_TOOL_NAME_MAX_STR_LENGTH = 100
35
30
  SPAN_TOOL_DESCRIPTION_MAX_STR_LENGTH = 1_000
@@ -43,7 +38,7 @@ JSON_STRING_MAX_STR_LENGTH = 4_000_000
43
38
  EVAL_LABEL_MIN_STR_LENGTH = 1 # we do not accept empty strings
44
39
  EVAL_LABEL_MAX_STR_LENGTH = 100
45
40
  EVAL_EXPLANATION_MAX_STR_LENGTH = 10_000
46
- #
41
+
47
42
  # # Annotation related constants
48
43
  ANNOTATION_LABEL_MIN_STR_LENGTH = 1
49
44
  ANNOTATION_LABEL_MAX_STR_LENGTH = 100 # Max length for annotation label string