arize 8.0.0a22__py3-none-any.whl → 8.0.0a23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. arize/__init__.py +17 -9
  2. arize/_exporter/client.py +55 -36
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +207 -76
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +268 -55
  65. arize/config.py +365 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +299 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +31 -12
  83. arize/embeddings/tabular_generators.py +32 -20
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +1 -0
  94. arize/experiments/client.py +389 -285
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/models/__init__.py +1 -0
  107. arize/models/batch_validation/__init__.py +1 -0
  108. arize/models/batch_validation/errors.py +543 -65
  109. arize/models/batch_validation/validator.py +339 -300
  110. arize/models/bounded_executor.py +20 -7
  111. arize/models/casting.py +75 -29
  112. arize/models/client.py +326 -107
  113. arize/models/proto.py +95 -40
  114. arize/models/stream_validation.py +42 -14
  115. arize/models/surrogate_explainer/__init__.py +1 -0
  116. arize/models/surrogate_explainer/mimic.py +24 -13
  117. arize/pre_releases.py +43 -0
  118. arize/projects/__init__.py +1 -0
  119. arize/projects/client.py +129 -0
  120. arize/regions.py +40 -0
  121. arize/spans/__init__.py +1 -0
  122. arize/spans/client.py +130 -106
  123. arize/spans/columns.py +13 -0
  124. arize/spans/conversion.py +54 -38
  125. arize/spans/validation/__init__.py +1 -0
  126. arize/spans/validation/annotations/__init__.py +1 -0
  127. arize/spans/validation/annotations/annotations_validation.py +6 -4
  128. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  129. arize/spans/validation/annotations/value_validation.py +35 -11
  130. arize/spans/validation/common/__init__.py +1 -0
  131. arize/spans/validation/common/argument_validation.py +33 -8
  132. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  133. arize/spans/validation/common/errors.py +211 -11
  134. arize/spans/validation/common/value_validation.py +80 -13
  135. arize/spans/validation/evals/__init__.py +1 -0
  136. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  137. arize/spans/validation/evals/evals_validation.py +34 -4
  138. arize/spans/validation/evals/value_validation.py +26 -3
  139. arize/spans/validation/metadata/__init__.py +1 -1
  140. arize/spans/validation/metadata/argument_validation.py +14 -5
  141. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  142. arize/spans/validation/metadata/value_validation.py +24 -10
  143. arize/spans/validation/spans/__init__.py +1 -0
  144. arize/spans/validation/spans/dataframe_form_validation.py +34 -13
  145. arize/spans/validation/spans/spans_validation.py +35 -4
  146. arize/spans/validation/spans/value_validation.py +76 -7
  147. arize/types.py +293 -157
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +19 -2
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/version.py +3 -1
  158. {arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
  159. arize-8.0.0a23.dist-info/RECORD +174 -0
  160. {arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
  161. arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
  162. arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
  163. arize/_generated/protocol/flight/export_pb2.py +0 -61
  164. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  165. arize-8.0.0a22.dist-info/RECORD +0 -146
  166. arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
arize/config.py CHANGED
@@ -1,207 +1,459 @@
1
+ """SDK configuration and settings management for the Arize client."""
2
+
1
3
  import logging
2
4
  import os
3
5
  import sys
4
6
  import threading
5
7
  from dataclasses import dataclass, field, fields
6
8
  from pathlib import Path
7
- from typing import Any, Dict
9
+ from typing import Any
8
10
 
9
11
  from arize.constants.config import (
10
12
  DEFAULT_API_HOST,
13
+ DEFAULT_API_SCHEME,
11
14
  DEFAULT_ARIZE_DIRECTORY,
12
15
  DEFAULT_ENABLE_CACHING,
13
16
  DEFAULT_FLIGHT_HOST,
14
17
  DEFAULT_FLIGHT_PORT,
15
- DEFAULT_FLIGHT_TRANSPORT_SCHEME,
16
- DEFAULT_INSECURE,
18
+ DEFAULT_FLIGHT_SCHEME,
17
19
  DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
18
20
  DEFAULT_OTLP_HOST,
21
+ DEFAULT_OTLP_SCHEME,
19
22
  DEFAULT_PYARROW_MAX_CHUNKSIZE,
20
23
  DEFAULT_REQUEST_VERIFY,
21
24
  DEFAULT_STREAM_MAX_QUEUE_BOUND,
22
25
  DEFAULT_STREAM_MAX_WORKERS,
23
26
  ENV_API_HOST,
24
27
  ENV_API_KEY,
28
+ ENV_API_SCHEME,
25
29
  ENV_ARIZE_DIRECTORY,
26
30
  ENV_ENABLE_CACHING,
27
31
  ENV_FLIGHT_HOST,
28
32
  ENV_FLIGHT_PORT,
29
- ENV_FLIGHT_TRANSPORT_SCHEME,
30
- ENV_INSECURE,
33
+ ENV_FLIGHT_SCHEME,
31
34
  ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
32
35
  ENV_OTLP_HOST,
36
+ ENV_OTLP_SCHEME,
33
37
  ENV_PYARROW_MAX_CHUNKSIZE,
38
+ ENV_REGION,
34
39
  ENV_REQUEST_VERIFY,
40
+ ENV_SINGLE_HOST,
41
+ ENV_SINGLE_PORT,
35
42
  ENV_STREAM_MAX_QUEUE_BOUND,
36
43
  ENV_STREAM_MAX_WORKERS,
37
44
  )
38
45
  from arize.constants.pyarrow import MAX_CHUNKSIZE
39
46
  from arize.exceptions.auth import MissingAPIKeyError
47
+ from arize.regions import REGION_ENDPOINTS, Region
40
48
  from arize.version import __version__
41
49
 
42
50
  logger = logging.getLogger(__name__)
43
51
 
44
-
45
- def _parse_bool(val: bool | str | None) -> bool:
46
- if isinstance(val, bool):
47
- return val
48
- return (val or "").strip().lower() in {"1", "true", "yes", "on"}
49
-
50
-
51
- def _api_key_factory() -> str:
52
- return os.getenv(ENV_API_KEY, "")
53
-
54
-
55
- def _api_host_factory() -> str:
56
- return os.getenv(ENV_API_HOST, DEFAULT_API_HOST)
57
-
58
-
59
- def _api_scheme_factory() -> str:
60
- insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
61
- if insecure:
62
- return "http"
63
- return "https"
52
+ PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
53
+ SENSITIVE_FIELD_MARKERS = ("key", "token", "secret")
54
+ ALLOWED_HTTP_SCHEMES = {"http", "https"}
64
55
 
65
56
 
66
- def _flight_host_factory() -> str:
67
- return os.getenv(ENV_FLIGHT_HOST, DEFAULT_FLIGHT_HOST)
57
+ def _is_sensitive_field(name: str) -> bool:
58
+ n = name.lower()
59
+ return bool(any(k in n for k in SENSITIVE_FIELD_MARKERS))
68
60
 
69
61
 
70
- def _flight_port_factory() -> int:
71
- return int(os.getenv(ENV_FLIGHT_PORT, DEFAULT_FLIGHT_PORT))
62
+ def _mask_secret(secret: str, N: int = 4) -> str:
63
+ """Show first N chars then '***'; empty string if empty."""
64
+ if len(secret) == 0:
65
+ return ""
66
+ return f"{secret[:N]}***"
72
67
 
73
68
 
74
- def _flight_scheme_factory() -> str:
75
- return os.getenv(
76
- ENV_FLIGHT_TRANSPORT_SCHEME, DEFAULT_FLIGHT_TRANSPORT_SCHEME
77
- )
69
+ def _endpoint(scheme: str, base: str, path: str = "") -> str:
70
+ endpoint = scheme + "://" + base.rstrip("/")
71
+ if path:
72
+ endpoint += "/" + path.lstrip("/")
73
+ return endpoint
78
74
 
79
75
 
80
- def _pyarrow_max_chunksize() -> int:
81
- max_chunksize = int(
82
- os.getenv(ENV_PYARROW_MAX_CHUNKSIZE, DEFAULT_PYARROW_MAX_CHUNKSIZE)
83
- )
84
- if max_chunksize <= 0 or max_chunksize > MAX_CHUNKSIZE:
76
+ def _env_http_scheme(name: str, default: str) -> str:
77
+ v = _env_str(name, default).lower()
78
+ if v not in ALLOWED_HTTP_SCHEMES:
85
79
  raise ValueError(
86
- f"Pyarrow max_chunksize must be between 1 and {MAX_CHUNKSIZE}, got {max_chunksize}"
80
+ f"{name} must be one of {sorted(ALLOWED_HTTP_SCHEMES)}. Found {v!r}"
87
81
  )
88
- return max_chunksize
89
-
82
+ return v
90
83
 
91
- def _verify_factory() -> bool:
92
- return _parse_bool(os.getenv(ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY))
93
84
 
85
+ def _env_str(
86
+ name: str,
87
+ default: str,
88
+ min_len: int | None = None,
89
+ max_len: int | None = None,
90
+ ) -> str:
91
+ val = os.getenv(name, default).strip()
94
92
 
95
- def _stream_max_workers_factory() -> int:
96
- return int(os.getenv(ENV_STREAM_MAX_WORKERS, DEFAULT_STREAM_MAX_WORKERS))
97
-
98
-
99
- def _stream_max_queue_bound_factory() -> int:
100
- return int(
101
- os.getenv(ENV_STREAM_MAX_QUEUE_BOUND, DEFAULT_STREAM_MAX_QUEUE_BOUND)
102
- )
103
-
104
-
105
- def _otlp_scheme_factory() -> str:
106
- insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
107
- if insecure:
108
- return "http"
109
- return "https"
110
-
111
-
112
- def _otlp_host_factory() -> str:
113
- return os.getenv(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
114
-
115
-
116
- def _max_http_payload_size_mb_factory() -> float:
117
- return float(
118
- os.getenv(
119
- ENV_MAX_HTTP_PAYLOAD_SIZE_MB, DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB
93
+ if min_len is not None and len(val) < min_len:
94
+ raise ValueError(
95
+ f"The value of environment variable {name} must be at least {min_len} "
96
+ f"characters long. Found {len(val)} characters."
120
97
  )
121
- )
122
-
123
-
124
- def _arize_dir_factory() -> str:
125
- return os.getenv(ENV_ARIZE_DIRECTORY, DEFAULT_ARIZE_DIRECTORY)
98
+ if max_len is not None and len(val) > max_len:
99
+ raise ValueError(
100
+ f"The value of environment variable {name} must be at most {max_len} "
101
+ f"characters long. Found {len(val)} characters."
102
+ )
103
+ return val
104
+
105
+
106
+ def _env_int(
107
+ name: str,
108
+ default: int,
109
+ min_val: int | None = None,
110
+ max_val: int | None = None,
111
+ ) -> int:
112
+ raw = os.getenv(name, default)
113
+ try:
114
+ val = int(raw)
115
+ except Exception as e:
116
+ raise ValueError(
117
+ f"Environment variable {name} must be an int. Found: {raw!r}"
118
+ ) from e
126
119
 
120
+ if min_val is not None and val < min_val:
121
+ raise ValueError(
122
+ f"The value of environment variable {name} must be at least {min_val}. Found {val}."
123
+ )
124
+ if max_val is not None and val > max_val:
125
+ raise ValueError(
126
+ f"The value of environment variable {name} must be at most {max_val}. Found {val}."
127
+ )
128
+ return val
129
+
130
+
131
+ def _env_float(
132
+ name: str,
133
+ default: float,
134
+ min_val: float | None = None,
135
+ max_val: float | None = None,
136
+ ) -> float:
137
+ raw = os.getenv(name, default)
138
+ try:
139
+ val = float(raw)
140
+ except Exception as e:
141
+ raise ValueError(
142
+ f"Environment variable {name} must be a float. Found: {raw!r}"
143
+ ) from e
127
144
 
128
- def _enable_cache_factory() -> bool:
129
- return _parse_bool(os.getenv(ENV_ENABLE_CACHING, DEFAULT_ENABLE_CACHING))
145
+ if min_val is not None and val < min_val:
146
+ raise ValueError(
147
+ f"The value of environment variable {name} must be at least {min_val}. Found {val}."
148
+ )
149
+ if max_val is not None and val > max_val:
150
+ raise ValueError(
151
+ f"The value of environment variable {name} must be at most {max_val}. Found {val}."
152
+ )
153
+ return val
130
154
 
131
155
 
132
- def _mask_secret(secret: str, N: int = 4) -> str:
133
- """Show first N chars then '***'; empty string if empty."""
134
- return f"{secret[:N]}***"
156
+ def _env_bool(name: str, default: bool) -> bool:
157
+ return _parse_bool(os.getenv(name, str(default)))
135
158
 
136
159
 
137
- def _endpoint(scheme: str, base: str, path: str = "") -> str:
138
- endpoint = scheme + "://" + base.rstrip("/")
139
- if path:
140
- endpoint += "/" + path.lstrip("/")
141
- return endpoint
160
+ def _parse_bool(val: bool | str | None) -> bool:
161
+ if isinstance(val, bool):
162
+ return val
163
+ return (val or "").strip().lower() in {"1", "true", "yes", "on"}
142
164
 
143
165
 
144
166
  @dataclass(frozen=True)
145
167
  class SDKConfiguration:
146
- api_key: str = field(default_factory=_api_key_factory)
147
- api_host: str = field(default_factory=_api_host_factory)
148
- api_scheme: str = field(default_factory=_api_scheme_factory)
149
- otlp_host: str = field(default_factory=_otlp_host_factory)
150
- otlp_scheme: str = field(default_factory=_otlp_scheme_factory)
151
- flight_server_host: str = field(default_factory=_flight_host_factory)
152
- flight_server_port: int = field(default_factory=_flight_port_factory)
153
- flight_scheme: str = field(default_factory=_flight_scheme_factory)
154
- pyarrow_max_chunksize: int = field(default_factory=_pyarrow_max_chunksize)
155
- request_verify: bool = field(default_factory=_verify_factory)
156
- stream_max_workers: int = field(default_factory=_stream_max_workers_factory)
168
+ """Configuration for the Arize SDK with endpoint and authentication settings.
169
+
170
+ This class is used internally by ArizeClient to manage SDK configuration. Users
171
+ typically interact with ArizeClient rather than instantiating this class directly.
172
+
173
+ Configuration Precedence
174
+ ------------------------
175
+ Each configuration parameter follows this resolution order:
176
+ 1. Explicit value passed to ArizeClient constructor (highest priority)
177
+ 2. Environment variable value
178
+ 3. Built-in default value (lowest priority)
179
+
180
+ Parameters
181
+ ----------
182
+ api_key : str
183
+ Arize API key for authentication. Required.
184
+ Environment variable: ARIZE_API_KEY
185
+ Default: None (must be provided via argument or environment variable)
186
+
187
+ api_host : str
188
+ API endpoint host.
189
+ Environment variable: ARIZE_API_HOST
190
+ Default: "api.arize.com"
191
+
192
+ api_scheme : str
193
+ API endpoint scheme (http/https).
194
+ Environment variable: ARIZE_API_SCHEME
195
+ Default: "https"
196
+
197
+ otlp_host : str
198
+ OTLP (OpenTelemetry Protocol) endpoint host.
199
+ Environment variable: ARIZE_OTLP_HOST
200
+ Default: "otlp.arize.com"
201
+
202
+ otlp_scheme : str
203
+ OTLP endpoint scheme (http/https).
204
+ Environment variable: ARIZE_OTLP_SCHEME
205
+ Default: "https"
206
+
207
+ flight_host : str
208
+ Apache Arrow Flight endpoint host.
209
+ Environment variable: ARIZE_FLIGHT_HOST
210
+ Default: "flight.arize.com"
211
+
212
+ flight_port : int
213
+ Apache Arrow Flight endpoint port (1-65535).
214
+ Environment variable: ARIZE_FLIGHT_PORT
215
+ Default: 443
216
+
217
+ flight_scheme : str
218
+ Apache Arrow Flight endpoint scheme.
219
+ Environment variable: ARIZE_FLIGHT_SCHEME
220
+ Default: "grpc+tls"
221
+
222
+ pyarrow_max_chunksize : int
223
+ Maximum chunk size for PyArrow operations (1 to MAX_CHUNKSIZE).
224
+ Environment variable: ARIZE_MAX_CHUNKSIZE
225
+ Default: 10_000
226
+
227
+ request_verify : bool
228
+ Whether to verify SSL certificates for HTTP requests.
229
+ Environment variable: ARIZE_REQUEST_VERIFY
230
+ Default: True
231
+
232
+ stream_max_workers : int
233
+ Maximum number of worker threads for streaming operations (minimum: 1).
234
+ Environment variable: ARIZE_STREAM_MAX_WORKERS
235
+ Default: 8
236
+
237
+ stream_max_queue_bound : int
238
+ Maximum queue size for streaming operations (minimum: 1).
239
+ Environment variable: ARIZE_STREAM_MAX_QUEUE_BOUND
240
+ Default: 5000
241
+
242
+ max_http_payload_size_mb : float
243
+ Maximum HTTP payload size in megabytes (minimum: 1).
244
+ Environment variable: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB
245
+ Default: 100
246
+
247
+ arize_directory : str
248
+ Directory for Arize SDK files (cache, logs, etc.).
249
+ Environment variable: ARIZE_DIRECTORY
250
+ Default: "~/.arize"
251
+
252
+ enable_caching : bool
253
+ Whether to enable local caching.
254
+ Environment variable: ARIZE_ENABLE_CACHING
255
+ Default: True
256
+
257
+ region : Region
258
+ Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
259
+ individual host/port settings.
260
+ Environment variable: ARIZE_REGION
261
+ Default: Region.UNSPECIFIED
262
+
263
+ single_host : str
264
+ Single host to use for all endpoints. Overrides individual host settings.
265
+ Environment variable: ARIZE_SINGLE_HOST
266
+ Default: "" (not set)
267
+
268
+ single_port : int
269
+ Single port to use for all endpoints. Overrides individual port settings (0-65535).
270
+ Environment variable: ARIZE_SINGLE_PORT
271
+ Default: 0 (not set)
272
+
273
+ See Also:
274
+ --------
275
+ ArizeClient : Main client class that uses this configuration
276
+ """
277
+
278
+ api_key: str = field(
279
+ default_factory=lambda: _env_str(ENV_API_KEY, ""),
280
+ )
281
+ api_host: str = field(
282
+ default_factory=lambda: _env_str(ENV_API_HOST, DEFAULT_API_HOST)
283
+ )
284
+ api_scheme: str = field(
285
+ default_factory=lambda: _env_http_scheme(
286
+ ENV_API_SCHEME,
287
+ DEFAULT_API_SCHEME,
288
+ ),
289
+ )
290
+ otlp_host: str = field(
291
+ default_factory=lambda: _env_str(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
292
+ )
293
+ otlp_scheme: str = field(
294
+ default_factory=lambda: _env_http_scheme(
295
+ ENV_OTLP_SCHEME,
296
+ DEFAULT_OTLP_SCHEME,
297
+ ),
298
+ )
299
+ flight_host: str = field(
300
+ default_factory=lambda: _env_str(ENV_FLIGHT_HOST, DEFAULT_FLIGHT_HOST)
301
+ )
302
+ flight_port: int = field(
303
+ default_factory=lambda: _env_int(
304
+ ENV_FLIGHT_PORT, DEFAULT_FLIGHT_PORT, min_val=1, max_val=65535
305
+ )
306
+ )
307
+ flight_scheme: str = field(
308
+ default_factory=lambda: _env_str(
309
+ ENV_FLIGHT_SCHEME,
310
+ DEFAULT_FLIGHT_SCHEME,
311
+ ),
312
+ )
313
+ pyarrow_max_chunksize: int = field(
314
+ default_factory=lambda: _env_int(
315
+ ENV_PYARROW_MAX_CHUNKSIZE,
316
+ DEFAULT_PYARROW_MAX_CHUNKSIZE,
317
+ min_val=1,
318
+ max_val=MAX_CHUNKSIZE,
319
+ )
320
+ )
321
+ request_verify: bool = field(
322
+ default_factory=lambda: _env_bool(
323
+ ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY
324
+ )
325
+ )
326
+ stream_max_workers: int = field(
327
+ default_factory=lambda: _env_int(
328
+ ENV_STREAM_MAX_WORKERS, DEFAULT_STREAM_MAX_WORKERS, min_val=1
329
+ )
330
+ )
157
331
  stream_max_queue_bound: int = field(
158
- default_factory=_stream_max_queue_bound_factory
332
+ default_factory=lambda: _env_int(
333
+ ENV_STREAM_MAX_QUEUE_BOUND,
334
+ DEFAULT_STREAM_MAX_QUEUE_BOUND,
335
+ min_val=1,
336
+ )
159
337
  )
160
338
  max_http_payload_size_mb: float = field(
161
- default_factory=_max_http_payload_size_mb_factory
339
+ default_factory=lambda: _env_float(
340
+ ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
341
+ DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
342
+ min_val=1,
343
+ )
344
+ )
345
+ arize_directory: str = field(
346
+ default_factory=lambda: _env_str(
347
+ ENV_ARIZE_DIRECTORY, DEFAULT_ARIZE_DIRECTORY
348
+ )
349
+ )
350
+ enable_caching: bool = field(
351
+ default_factory=lambda: _env_bool(
352
+ ENV_ENABLE_CACHING, DEFAULT_ENABLE_CACHING
353
+ )
354
+ )
355
+ region: Region = field(
356
+ default_factory=lambda: Region(_env_str(ENV_REGION, ""))
357
+ )
358
+ single_host: str = field(
359
+ default_factory=lambda: _env_str(ENV_SINGLE_HOST, "")
360
+ )
361
+ single_port: int = field(
362
+ default_factory=lambda: _env_int(
363
+ ENV_SINGLE_PORT, 0, min_val=0, max_val=65535
364
+ )
162
365
  )
163
- arize_direcory: str = field(default_factory=_arize_dir_factory)
164
- enable_caching: bool = field(default_factory=_enable_cache_factory)
165
366
 
166
367
  # Private, excluded from comparisons & repr
167
- _headers: Dict[str, str] = field(init=False, repr=False, compare=False)
168
368
  _gen_client: Any = field(default=None, repr=False, compare=False)
169
369
  _gen_lock: threading.Lock = field(
170
370
  default_factory=threading.Lock, repr=False, compare=False
171
371
  )
172
372
 
173
- def __post_init__(self):
373
+ def __post_init__(self) -> None:
374
+ """Validate and configure SDK endpoints after initialization.
375
+
376
+ Raises:
377
+ MissingAPIKeyError: If API key is not provided.
378
+ """
174
379
  # Validate Configuration
175
380
  if not self.api_key:
176
381
  raise MissingAPIKeyError()
177
382
 
383
+ has_single_host = bool(self.single_host)
384
+ has_single_port = self.single_port != 0
385
+ has_region = self.region is not Region.UNSPECIFIED
386
+ if (has_single_host or has_single_port) and has_region:
387
+ logger.info(
388
+ "Multiple endpoint override options provided. Preference order is: "
389
+ "region > single_host/single_port > per-endpoint host/port."
390
+ )
391
+
392
+ # Single host override: if single_host is set, it overrides hosts
393
+ if has_single_host:
394
+ logger.info(
395
+ "Single host %r provided; overriding hosts configuration with single host.",
396
+ self.single_host,
397
+ )
398
+ object.__setattr__(self, "api_host", self.single_host)
399
+ object.__setattr__(self, "otlp_host", self.single_host)
400
+ object.__setattr__(self, "flight_host", self.single_host)
401
+
402
+ # Single port override: if single_port is set, it overrides ports
403
+ if has_single_port:
404
+ logger.info(
405
+ "Single port %s provided; overriding ports configuration with single port.",
406
+ self.single_port,
407
+ )
408
+ object.__setattr__(self, "flight_port", self.single_port)
409
+
410
+ # Region override: if region is set, it *always* wins over host/port fields
411
+ if has_region:
412
+ endpoints = REGION_ENDPOINTS[self.region]
413
+
414
+ # Override config (region trumps everything)
415
+ logger.info(
416
+ "Region %s provided; overriding hosts & ports configuration with region defaults.",
417
+ self.region.value,
418
+ )
419
+ object.__setattr__(self, "api_host", endpoints.api_host)
420
+ object.__setattr__(self, "otlp_host", endpoints.otlp_host)
421
+ object.__setattr__(self, "flight_host", endpoints.flight_host)
422
+ object.__setattr__(self, "flight_port", endpoints.flight_port)
423
+
178
424
  @property
179
425
  def cache_dir(self) -> str:
180
- return str(Path(self.arize_direcory) / "cache")
426
+ """Return the path to the cache directory."""
427
+ return str(Path(self.arize_directory) / "cache")
181
428
 
182
429
  @property
183
430
  def api_url(self) -> str:
431
+ """Return the base API URL."""
184
432
  return _endpoint(self.api_scheme, self.api_host)
185
433
 
186
434
  @property
187
435
  def otlp_url(self) -> str:
436
+ """Return the OTLP endpoint URL."""
188
437
  return _endpoint(self.otlp_scheme, self.otlp_host, "/v1")
189
438
 
190
439
  @property
191
440
  def files_url(self) -> str:
441
+ """Return the files upload endpoint URL."""
192
442
  return _endpoint(self.api_scheme, self.api_host, "/v1/pandas_arrow")
193
443
 
194
444
  @property
195
445
  def records_url(self) -> str:
446
+ """Return the records logging endpoint URL."""
196
447
  return _endpoint(self.api_scheme, self.api_host, "/v1/log")
197
448
 
198
449
  @property
199
- def headers(self) -> Dict[str, str]:
450
+ def headers(self) -> dict[str, str]:
451
+ """Return HTTP headers for API requests."""
200
452
  # Create base headers
201
453
  return {
202
454
  "authorization": self.api_key,
203
455
  "sdk-language": "python",
204
- "language-version": get_python_version(),
456
+ "language-version": PYTHON_VERSION,
205
457
  "sdk-version": __version__,
206
458
  # "arize-space-id": self._space_id,
207
459
  # "arize-interface": "batch",
@@ -209,31 +461,34 @@ class SDKConfiguration:
209
461
  }
210
462
 
211
463
  @property
212
- def headers_grpc(self) -> Dict[str, str]:
464
+ def headers_grpc(self) -> dict[str, str]:
465
+ """Return headers for gRPC requests."""
213
466
  return {
214
467
  "authorization": self.api_key,
215
468
  "Grpc-Metadata-sdk-language": "python",
216
- "Grpc-Metadata-language-version": get_python_version(),
469
+ "Grpc-Metadata-language-version": PYTHON_VERSION,
217
470
  "Grpc-Metadata-sdk-version": __version__,
218
471
  # "Grpc-Metadata-arize-space-id": space_id,
219
472
  # "Grpc-Metadata-arize-interface": "stream",
220
473
  }
221
474
 
222
475
  def __repr__(self) -> str:
476
+ """Return a detailed string representation with masked sensitive fields."""
223
477
  # Dynamically build repr for all fields
224
478
  lines = [f"{self.__class__.__name__}("]
225
479
  for f in fields(self):
226
480
  if not f.repr:
227
481
  continue
228
482
  val = getattr(self, f.name)
229
- if f.name == "api_key":
483
+ if _is_sensitive_field(f.name):
230
484
  val = _mask_secret(val, 6)
231
485
  lines.append(f" {f.name}={val!r},")
232
486
  lines.append(")")
233
487
  return "\n".join(lines)
234
488
 
235
489
  # TODO(Kiko): This may not be well placed in this class
236
- def get_generated_client(self):
490
+ def get_generated_client(self) -> object:
491
+ """Get or create the generated OpenAPI client instance."""
237
492
  # If already cached, return immediately
238
493
  if self._gen_client is not None:
239
494
  return self._gen_client
@@ -243,21 +498,15 @@ class SDKConfiguration:
243
498
  if self._gen_client is not None:
244
499
  return self._gen_client
245
500
 
246
- # Import lazily so extras can be enforced outside
501
+ # Import lazily so extra dependencies can be
502
+ # enforced outside the configuration class
247
503
  from arize._generated import api_client as gen
248
504
 
249
505
  cfg = gen.Configuration(host=self.api_url)
250
506
  if self.api_key:
251
- cfg.api_key["ApiKeyAuth"] = self.api_key
507
+ cfg.access_token = self.api_key
252
508
  client = gen.ApiClient(cfg)
253
509
 
254
510
  # Bypass frozen to set the cache once
255
511
  object.__setattr__(self, "_gen_client", client)
256
512
  return client
257
-
258
-
259
- def get_python_version():
260
- return (
261
- f"{sys.version_info.major}.{sys.version_info.minor}."
262
- f"{sys.version_info.micro}"
263
- )
@@ -0,0 +1 @@
1
+ """Constants and configuration values used across the Arize SDK."""
arize/constants/config.py CHANGED
@@ -1,28 +1,35 @@
1
+ """Configuration constants and environment variable names."""
2
+
1
3
  # Authentication
2
4
  ENV_API_KEY = "ARIZE_API_KEY"
3
5
 
4
6
  # Server configuration env vars
7
+ ENV_REGION = "ARIZE_REGION"
5
8
  ENV_API_HOST = "ARIZE_API_HOST"
9
+ ENV_API_SCHEME = "ARIZE_API_SCHEME"
6
10
  ENV_OTLP_HOST = "ARIZE_OTLP_HOST"
11
+ ENV_OTLP_SCHEME = "ARIZE_OTLP_SCHEME"
7
12
  ENV_FLIGHT_HOST = "ARIZE_FLIGHT_HOST"
8
13
  ENV_FLIGHT_PORT = "ARIZE_FLIGHT_PORT"
9
- ENV_FLIGHT_TRANSPORT_SCHEME = "ARIZE_FLIGHT_TRANSPORT_SCHEME"
14
+ ENV_FLIGHT_SCHEME = "ARIZE_FLIGHT_SCHEME"
15
+ ENV_SINGLE_HOST = "ARIZE_SINGLE_HOST"
16
+ ENV_SINGLE_PORT = "ARIZE_SINGLE_PORT"
10
17
  ENV_PYARROW_MAX_CHUNKSIZE = "ARIZE_MAX_CHUNKSIZE"
11
18
  ENV_REQUEST_VERIFY = "ARIZE_REQUEST_VERIFY"
12
- ENV_INSECURE = "ARIZE_INSECURE"
13
19
  ENV_MAX_HTTP_PAYLOAD_SIZE_MB = "ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB"
14
20
  ENV_ARIZE_DIRECTORY = "ARIZE_DIRECTORY"
15
21
  ENV_ENABLE_CACHING = "ARIZE_ENABLE_CACHING"
16
22
 
17
23
  # Server configuration default values
18
24
  DEFAULT_API_HOST = "api.arize.com" # NOTE: Must not prefix with https://
25
+ DEFAULT_API_SCHEME = "https"
19
26
  DEFAULT_OTLP_HOST = "otlp.arize.com" # NOTE: Must not prefix with https://
27
+ DEFAULT_OTLP_SCHEME = "https"
20
28
  DEFAULT_FLIGHT_HOST = "flight.arize.com" # NOTE: Must not prefix with https://
21
29
  DEFAULT_FLIGHT_PORT = 443
22
- DEFAULT_FLIGHT_TRANSPORT_SCHEME = "grpc+tls"
30
+ DEFAULT_FLIGHT_SCHEME = "grpc+tls"
23
31
  DEFAULT_PYARROW_MAX_CHUNKSIZE = 10_000
24
32
  DEFAULT_REQUEST_VERIFY = True
25
- DEFAULT_INSECURE = False
26
33
  DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB = 100
27
34
  DEFAULT_ARIZE_DIRECTORY = "~/.arize"
28
35
  DEFAULT_ENABLE_CACHING = True