arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. arize/__init__.py +28 -19
  2. arize/_exporter/client.py +56 -37
  3. arize/_exporter/parsers/tracing_data_parser.py +41 -30
  4. arize/_exporter/validation.py +3 -3
  5. arize/_flight/client.py +207 -76
  6. arize/_generated/api_client/__init__.py +30 -6
  7. arize/_generated/api_client/api/__init__.py +1 -0
  8. arize/_generated/api_client/api/datasets_api.py +864 -190
  9. arize/_generated/api_client/api/experiments_api.py +167 -131
  10. arize/_generated/api_client/api/projects_api.py +1197 -0
  11. arize/_generated/api_client/api_client.py +2 -2
  12. arize/_generated/api_client/configuration.py +42 -34
  13. arize/_generated/api_client/exceptions.py +2 -2
  14. arize/_generated/api_client/models/__init__.py +15 -4
  15. arize/_generated/api_client/models/dataset.py +10 -10
  16. arize/_generated/api_client/models/dataset_example.py +111 -0
  17. arize/_generated/api_client/models/dataset_example_update.py +100 -0
  18. arize/_generated/api_client/models/dataset_version.py +13 -13
  19. arize/_generated/api_client/models/datasets_create_request.py +16 -8
  20. arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
  21. arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
  22. arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
  23. arize/_generated/api_client/models/datasets_list200_response.py +10 -4
  24. arize/_generated/api_client/models/experiment.py +14 -16
  25. arize/_generated/api_client/models/experiment_run.py +108 -0
  26. arize/_generated/api_client/models/experiment_run_create.py +102 -0
  27. arize/_generated/api_client/models/experiments_create_request.py +16 -10
  28. arize/_generated/api_client/models/experiments_list200_response.py +10 -4
  29. arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
  30. arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
  31. arize/_generated/api_client/models/primitive_value.py +172 -0
  32. arize/_generated/api_client/models/problem.py +100 -0
  33. arize/_generated/api_client/models/project.py +99 -0
  34. arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
  35. arize/_generated/api_client/models/projects_list200_response.py +106 -0
  36. arize/_generated/api_client/rest.py +2 -2
  37. arize/_generated/api_client/test/test_dataset.py +4 -2
  38. arize/_generated/api_client/test/test_dataset_example.py +56 -0
  39. arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
  40. arize/_generated/api_client/test/test_dataset_version.py +7 -2
  41. arize/_generated/api_client/test/test_datasets_api.py +27 -13
  42. arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
  43. arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
  44. arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
  45. arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
  46. arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
  47. arize/_generated/api_client/test/test_experiment.py +2 -4
  48. arize/_generated/api_client/test/test_experiment_run.py +56 -0
  49. arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
  50. arize/_generated/api_client/test/test_experiments_api.py +6 -6
  51. arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
  52. arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
  53. arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
  54. arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
  55. arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
  56. arize/_generated/api_client/test/test_problem.py +57 -0
  57. arize/_generated/api_client/test/test_project.py +58 -0
  58. arize/_generated/api_client/test/test_projects_api.py +59 -0
  59. arize/_generated/api_client/test/test_projects_create_request.py +54 -0
  60. arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
  61. arize/_generated/api_client_README.md +43 -29
  62. arize/_generated/protocol/flight/flight_pb2.py +400 -0
  63. arize/_lazy.py +27 -19
  64. arize/client.py +181 -58
  65. arize/config.py +324 -116
  66. arize/constants/__init__.py +1 -0
  67. arize/constants/config.py +11 -4
  68. arize/constants/ml.py +6 -4
  69. arize/constants/openinference.py +2 -0
  70. arize/constants/pyarrow.py +2 -0
  71. arize/constants/spans.py +3 -1
  72. arize/datasets/__init__.py +1 -0
  73. arize/datasets/client.py +304 -84
  74. arize/datasets/errors.py +32 -2
  75. arize/datasets/validation.py +18 -8
  76. arize/embeddings/__init__.py +2 -0
  77. arize/embeddings/auto_generator.py +23 -19
  78. arize/embeddings/base_generators.py +89 -36
  79. arize/embeddings/constants.py +2 -0
  80. arize/embeddings/cv_generators.py +26 -4
  81. arize/embeddings/errors.py +27 -5
  82. arize/embeddings/nlp_generators.py +43 -18
  83. arize/embeddings/tabular_generators.py +46 -31
  84. arize/embeddings/usecases.py +12 -2
  85. arize/exceptions/__init__.py +1 -0
  86. arize/exceptions/auth.py +11 -1
  87. arize/exceptions/base.py +29 -4
  88. arize/exceptions/models.py +21 -2
  89. arize/exceptions/parameters.py +31 -0
  90. arize/exceptions/spaces.py +12 -1
  91. arize/exceptions/types.py +86 -7
  92. arize/exceptions/values.py +220 -20
  93. arize/experiments/__init__.py +13 -0
  94. arize/experiments/client.py +394 -285
  95. arize/experiments/evaluators/__init__.py +1 -0
  96. arize/experiments/evaluators/base.py +74 -41
  97. arize/experiments/evaluators/exceptions.py +6 -3
  98. arize/experiments/evaluators/executors.py +121 -73
  99. arize/experiments/evaluators/rate_limiters.py +106 -57
  100. arize/experiments/evaluators/types.py +34 -7
  101. arize/experiments/evaluators/utils.py +65 -27
  102. arize/experiments/functions.py +103 -101
  103. arize/experiments/tracing.py +52 -44
  104. arize/experiments/types.py +56 -31
  105. arize/logging.py +54 -22
  106. arize/ml/__init__.py +1 -0
  107. arize/ml/batch_validation/__init__.py +1 -0
  108. arize/{models → ml}/batch_validation/errors.py +545 -67
  109. arize/{models → ml}/batch_validation/validator.py +344 -303
  110. arize/ml/bounded_executor.py +47 -0
  111. arize/{models → ml}/casting.py +118 -108
  112. arize/{models → ml}/client.py +339 -118
  113. arize/{models → ml}/proto.py +97 -42
  114. arize/{models → ml}/stream_validation.py +43 -15
  115. arize/ml/surrogate_explainer/__init__.py +1 -0
  116. arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
  117. arize/{types.py → ml/types.py} +355 -354
  118. arize/pre_releases.py +44 -0
  119. arize/projects/__init__.py +1 -0
  120. arize/projects/client.py +134 -0
  121. arize/regions.py +40 -0
  122. arize/spans/__init__.py +1 -0
  123. arize/spans/client.py +204 -175
  124. arize/spans/columns.py +13 -0
  125. arize/spans/conversion.py +60 -37
  126. arize/spans/validation/__init__.py +1 -0
  127. arize/spans/validation/annotations/__init__.py +1 -0
  128. arize/spans/validation/annotations/annotations_validation.py +6 -4
  129. arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
  130. arize/spans/validation/annotations/value_validation.py +35 -11
  131. arize/spans/validation/common/__init__.py +1 -0
  132. arize/spans/validation/common/argument_validation.py +33 -8
  133. arize/spans/validation/common/dataframe_form_validation.py +35 -9
  134. arize/spans/validation/common/errors.py +211 -11
  135. arize/spans/validation/common/value_validation.py +81 -14
  136. arize/spans/validation/evals/__init__.py +1 -0
  137. arize/spans/validation/evals/dataframe_form_validation.py +28 -8
  138. arize/spans/validation/evals/evals_validation.py +34 -4
  139. arize/spans/validation/evals/value_validation.py +26 -3
  140. arize/spans/validation/metadata/__init__.py +1 -1
  141. arize/spans/validation/metadata/argument_validation.py +14 -5
  142. arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
  143. arize/spans/validation/metadata/value_validation.py +24 -10
  144. arize/spans/validation/spans/__init__.py +1 -0
  145. arize/spans/validation/spans/dataframe_form_validation.py +35 -14
  146. arize/spans/validation/spans/spans_validation.py +35 -4
  147. arize/spans/validation/spans/value_validation.py +78 -8
  148. arize/utils/__init__.py +1 -0
  149. arize/utils/arrow.py +31 -15
  150. arize/utils/cache.py +34 -6
  151. arize/utils/dataframe.py +20 -3
  152. arize/utils/online_tasks/__init__.py +2 -0
  153. arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
  154. arize/utils/openinference_conversion.py +44 -5
  155. arize/utils/proto.py +10 -0
  156. arize/utils/size.py +5 -3
  157. arize/utils/types.py +105 -0
  158. arize/version.py +3 -1
  159. {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
  160. arize-8.0.0b0.dist-info/RECORD +175 -0
  161. {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
  162. arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
  163. arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
  164. arize/_generated/protocol/flight/export_pb2.py +0 -61
  165. arize/_generated/protocol/flight/ingest_pb2.py +0 -365
  166. arize/models/__init__.py +0 -0
  167. arize/models/batch_validation/__init__.py +0 -0
  168. arize/models/bounded_executor.py +0 -34
  169. arize/models/surrogate_explainer/__init__.py +0 -0
  170. arize-8.0.0a22.dist-info/RECORD +0 -146
  171. arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
arize/config.py CHANGED
@@ -1,207 +1,418 @@
1
+ """SDK configuration and settings management for the Arize client."""
2
+
1
3
  import logging
2
4
  import os
3
5
  import sys
4
6
  import threading
5
7
  from dataclasses import dataclass, field, fields
6
8
  from pathlib import Path
7
- from typing import Any, Dict
9
+ from typing import Any
8
10
 
9
11
  from arize.constants.config import (
10
12
  DEFAULT_API_HOST,
13
+ DEFAULT_API_SCHEME,
11
14
  DEFAULT_ARIZE_DIRECTORY,
12
15
  DEFAULT_ENABLE_CACHING,
13
16
  DEFAULT_FLIGHT_HOST,
14
17
  DEFAULT_FLIGHT_PORT,
15
- DEFAULT_FLIGHT_TRANSPORT_SCHEME,
16
- DEFAULT_INSECURE,
18
+ DEFAULT_FLIGHT_SCHEME,
17
19
  DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
18
20
  DEFAULT_OTLP_HOST,
21
+ DEFAULT_OTLP_SCHEME,
19
22
  DEFAULT_PYARROW_MAX_CHUNKSIZE,
20
23
  DEFAULT_REQUEST_VERIFY,
21
24
  DEFAULT_STREAM_MAX_QUEUE_BOUND,
22
25
  DEFAULT_STREAM_MAX_WORKERS,
23
26
  ENV_API_HOST,
24
27
  ENV_API_KEY,
28
+ ENV_API_SCHEME,
25
29
  ENV_ARIZE_DIRECTORY,
26
30
  ENV_ENABLE_CACHING,
27
31
  ENV_FLIGHT_HOST,
28
32
  ENV_FLIGHT_PORT,
29
- ENV_FLIGHT_TRANSPORT_SCHEME,
30
- ENV_INSECURE,
33
+ ENV_FLIGHT_SCHEME,
31
34
  ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
32
35
  ENV_OTLP_HOST,
36
+ ENV_OTLP_SCHEME,
33
37
  ENV_PYARROW_MAX_CHUNKSIZE,
38
+ ENV_REGION,
34
39
  ENV_REQUEST_VERIFY,
40
+ ENV_SINGLE_HOST,
41
+ ENV_SINGLE_PORT,
35
42
  ENV_STREAM_MAX_QUEUE_BOUND,
36
43
  ENV_STREAM_MAX_WORKERS,
37
44
  )
38
45
  from arize.constants.pyarrow import MAX_CHUNKSIZE
39
46
  from arize.exceptions.auth import MissingAPIKeyError
47
+ from arize.regions import REGION_ENDPOINTS, Region
40
48
  from arize.version import __version__
41
49
 
42
50
  logger = logging.getLogger(__name__)
43
51
 
44
-
45
- def _parse_bool(val: bool | str | None) -> bool:
46
- if isinstance(val, bool):
47
- return val
48
- return (val or "").strip().lower() in {"1", "true", "yes", "on"}
49
-
50
-
51
- def _api_key_factory() -> str:
52
- return os.getenv(ENV_API_KEY, "")
53
-
54
-
55
- def _api_host_factory() -> str:
56
- return os.getenv(ENV_API_HOST, DEFAULT_API_HOST)
57
-
58
-
59
- def _api_scheme_factory() -> str:
60
- insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
61
- if insecure:
62
- return "http"
63
- return "https"
52
+ PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
53
+ SENSITIVE_FIELD_MARKERS = ("key", "token", "secret")
54
+ ALLOWED_HTTP_SCHEMES = {"http", "https"}
64
55
 
65
56
 
66
- def _flight_host_factory() -> str:
67
- return os.getenv(ENV_FLIGHT_HOST, DEFAULT_FLIGHT_HOST)
57
+ def _is_sensitive_field(name: str) -> bool:
58
+ n = name.lower()
59
+ return bool(any(k in n for k in SENSITIVE_FIELD_MARKERS))
68
60
 
69
61
 
70
- def _flight_port_factory() -> int:
71
- return int(os.getenv(ENV_FLIGHT_PORT, DEFAULT_FLIGHT_PORT))
62
+ def _mask_secret(secret: str, N: int = 4) -> str:
63
+ """Show first N chars then '***'; empty string if empty."""
64
+ if len(secret) == 0:
65
+ return ""
66
+ return f"{secret[:N]}***"
72
67
 
73
68
 
74
- def _flight_scheme_factory() -> str:
75
- return os.getenv(
76
- ENV_FLIGHT_TRANSPORT_SCHEME, DEFAULT_FLIGHT_TRANSPORT_SCHEME
77
- )
69
+ def _endpoint(scheme: str, base: str, path: str = "") -> str:
70
+ endpoint = scheme + "://" + base.rstrip("/")
71
+ if path:
72
+ endpoint += "/" + path.lstrip("/")
73
+ return endpoint
78
74
 
79
75
 
80
- def _pyarrow_max_chunksize() -> int:
81
- max_chunksize = int(
82
- os.getenv(ENV_PYARROW_MAX_CHUNKSIZE, DEFAULT_PYARROW_MAX_CHUNKSIZE)
83
- )
84
- if max_chunksize <= 0 or max_chunksize > MAX_CHUNKSIZE:
76
+ def _env_http_scheme(name: str, default: str) -> str:
77
+ v = _env_str(name, default).lower()
78
+ if v not in ALLOWED_HTTP_SCHEMES:
85
79
  raise ValueError(
86
- f"Pyarrow max_chunksize must be between 1 and {MAX_CHUNKSIZE}, got {max_chunksize}"
80
+ f"{name} must be one of {sorted(ALLOWED_HTTP_SCHEMES)}. Found {v!r}"
87
81
  )
88
- return max_chunksize
89
-
82
+ return v
90
83
 
91
- def _verify_factory() -> bool:
92
- return _parse_bool(os.getenv(ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY))
93
84
 
85
+ def _env_str(
86
+ name: str,
87
+ default: str,
88
+ min_len: int | None = None,
89
+ max_len: int | None = None,
90
+ ) -> str:
91
+ val = os.getenv(name, default).strip()
94
92
 
95
- def _stream_max_workers_factory() -> int:
96
- return int(os.getenv(ENV_STREAM_MAX_WORKERS, DEFAULT_STREAM_MAX_WORKERS))
97
-
98
-
99
- def _stream_max_queue_bound_factory() -> int:
100
- return int(
101
- os.getenv(ENV_STREAM_MAX_QUEUE_BOUND, DEFAULT_STREAM_MAX_QUEUE_BOUND)
102
- )
103
-
104
-
105
- def _otlp_scheme_factory() -> str:
106
- insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
107
- if insecure:
108
- return "http"
109
- return "https"
110
-
111
-
112
- def _otlp_host_factory() -> str:
113
- return os.getenv(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
114
-
115
-
116
- def _max_http_payload_size_mb_factory() -> float:
117
- return float(
118
- os.getenv(
119
- ENV_MAX_HTTP_PAYLOAD_SIZE_MB, DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB
93
+ if min_len is not None and len(val) < min_len:
94
+ raise ValueError(
95
+ f"The value of environment variable {name} must be at least {min_len} "
96
+ f"characters long. Found {len(val)} characters."
120
97
  )
121
- )
122
-
123
-
124
- def _arize_dir_factory() -> str:
125
- return os.getenv(ENV_ARIZE_DIRECTORY, DEFAULT_ARIZE_DIRECTORY)
98
+ if max_len is not None and len(val) > max_len:
99
+ raise ValueError(
100
+ f"The value of environment variable {name} must be at most {max_len} "
101
+ f"characters long. Found {len(val)} characters."
102
+ )
103
+ return val
104
+
105
+
106
+ def _env_int(
107
+ name: str,
108
+ default: int,
109
+ min_val: int | None = None,
110
+ max_val: int | None = None,
111
+ ) -> int:
112
+ raw = os.getenv(name, default)
113
+ try:
114
+ val = int(raw)
115
+ except Exception as e:
116
+ raise ValueError(
117
+ f"Environment variable {name} must be an int. Found: {raw!r}"
118
+ ) from e
126
119
 
120
+ if min_val is not None and val < min_val:
121
+ raise ValueError(
122
+ f"The value of environment variable {name} must be at least {min_val}. Found {val}."
123
+ )
124
+ if max_val is not None and val > max_val:
125
+ raise ValueError(
126
+ f"The value of environment variable {name} must be at most {max_val}. Found {val}."
127
+ )
128
+ return val
129
+
130
+
131
+ def _env_float(
132
+ name: str,
133
+ default: float,
134
+ min_val: float | None = None,
135
+ max_val: float | None = None,
136
+ ) -> float:
137
+ raw = os.getenv(name, default)
138
+ try:
139
+ val = float(raw)
140
+ except Exception as e:
141
+ raise ValueError(
142
+ f"Environment variable {name} must be a float. Found: {raw!r}"
143
+ ) from e
127
144
 
128
- def _enable_cache_factory() -> bool:
129
- return _parse_bool(os.getenv(ENV_ENABLE_CACHING, DEFAULT_ENABLE_CACHING))
145
+ if min_val is not None and val < min_val:
146
+ raise ValueError(
147
+ f"The value of environment variable {name} must be at least {min_val}. Found {val}."
148
+ )
149
+ if max_val is not None and val > max_val:
150
+ raise ValueError(
151
+ f"The value of environment variable {name} must be at most {max_val}. Found {val}."
152
+ )
153
+ return val
130
154
 
131
155
 
132
- def _mask_secret(secret: str, N: int = 4) -> str:
133
- """Show first N chars then '***'; empty string if empty."""
134
- return f"{secret[:N]}***"
156
+ def _env_bool(name: str, default: bool) -> bool:
157
+ return _parse_bool(os.getenv(name, str(default)))
135
158
 
136
159
 
137
- def _endpoint(scheme: str, base: str, path: str = "") -> str:
138
- endpoint = scheme + "://" + base.rstrip("/")
139
- if path:
140
- endpoint += "/" + path.lstrip("/")
141
- return endpoint
160
+ def _parse_bool(val: bool | str | None) -> bool:
161
+ if isinstance(val, bool):
162
+ return val
163
+ return (val or "").strip().lower() in {"1", "true", "yes", "on"}
142
164
 
143
165
 
144
166
  @dataclass(frozen=True)
145
167
  class SDKConfiguration:
146
- api_key: str = field(default_factory=_api_key_factory)
147
- api_host: str = field(default_factory=_api_host_factory)
148
- api_scheme: str = field(default_factory=_api_scheme_factory)
149
- otlp_host: str = field(default_factory=_otlp_host_factory)
150
- otlp_scheme: str = field(default_factory=_otlp_scheme_factory)
151
- flight_server_host: str = field(default_factory=_flight_host_factory)
152
- flight_server_port: int = field(default_factory=_flight_port_factory)
153
- flight_scheme: str = field(default_factory=_flight_scheme_factory)
154
- pyarrow_max_chunksize: int = field(default_factory=_pyarrow_max_chunksize)
155
- request_verify: bool = field(default_factory=_verify_factory)
156
- stream_max_workers: int = field(default_factory=_stream_max_workers_factory)
168
+ """Configuration for the Arize SDK with endpoint and authentication settings.
169
+
170
+ This class is used internally by ArizeClient to manage SDK configuration. It is not
171
+ recommended to use this class directly; users should interact with ArizeClient
172
+ instead.
173
+
174
+ Each configuration parameter follows this resolution order:
175
+ 1. Explicit value passed to ArizeClient constructor (highest priority)
176
+ 2. Environment variable value
177
+ 3. Built-in default value (lowest priority)
178
+
179
+ Args:
180
+ api_key: Arize API key for authentication. Required.
181
+ Environment variable: ARIZE_API_KEY.
182
+ Default: None (must be provided via argument or environment variable).
183
+ api_host: API endpoint host.
184
+ Environment variable: ARIZE_API_HOST.
185
+ Default: "api.arize.com".
186
+ api_scheme: API endpoint scheme (http/https).
187
+ Environment variable: ARIZE_API_SCHEME.
188
+ Default: "https".
189
+ otlp_host: OTLP (OpenTelemetry Protocol) endpoint host.
190
+ Environment variable: ARIZE_OTLP_HOST.
191
+ Default: "otlp.arize.com".
192
+ otlp_scheme: OTLP endpoint scheme (http/https).
193
+ Environment variable: ARIZE_OTLP_SCHEME.
194
+ Default: "https".
195
+ flight_host: Apache Arrow Flight endpoint host.
196
+ Environment variable: ARIZE_FLIGHT_HOST.
197
+ Default: "flight.arize.com".
198
+ flight_port: Apache Arrow Flight endpoint port (1-65535).
199
+ Environment variable: ARIZE_FLIGHT_PORT.
200
+ Default: 443.
201
+ flight_scheme: Apache Arrow Flight endpoint scheme.
202
+ Environment variable: ARIZE_FLIGHT_SCHEME.
203
+ Default: "grpc+tls".
204
+ pyarrow_max_chunksize: Maximum chunk size for PyArrow operations (1 to MAX_CHUNKSIZE).
205
+ Environment variable: ARIZE_MAX_CHUNKSIZE.
206
+ Default: 10_000.
207
+ request_verify: Whether to verify SSL certificates for HTTP requests.
208
+ Environment variable: ARIZE_REQUEST_VERIFY.
209
+ Default: True.
210
+ stream_max_workers: Maximum number of worker threads for streaming operations (minimum: 1).
211
+ Environment variable: ARIZE_STREAM_MAX_WORKERS.
212
+ Default: 8.
213
+ stream_max_queue_bound: Maximum queue size for streaming operations (minimum: 1).
214
+ Environment variable: ARIZE_STREAM_MAX_QUEUE_BOUND.
215
+ Default: 5000.
216
+ max_http_payload_size_mb: Maximum HTTP payload size in megabytes (minimum: 1).
217
+ Environment variable: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
218
+ Default: 100.
219
+ arize_directory: Directory for Arize SDK files (cache, logs, etc.).
220
+ Environment variable: ARIZE_DIRECTORY.
221
+ Default: "~/.arize".
222
+ enable_caching: Whether to enable local caching.
223
+ Environment variable: ARIZE_ENABLE_CACHING.
224
+ Default: True.
225
+ region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
226
+ individual host/port settings.
227
+ Environment variable: ARIZE_REGION.
228
+ Default: Region.UNSPECIFIED.
229
+ single_host: Single host to use for all endpoints. Overrides individual host settings.
230
+ Environment variable: ARIZE_SINGLE_HOST.
231
+ Default: "" (not set).
232
+ single_port: Single port to use for all endpoints. Overrides individual port settings (0-65535).
233
+ Environment variable: ARIZE_SINGLE_PORT.
234
+ Default: 0 (not set).
235
+ """
236
+
237
+ api_key: str = field(
238
+ default_factory=lambda: _env_str(ENV_API_KEY, ""),
239
+ )
240
+ api_host: str = field(
241
+ default_factory=lambda: _env_str(ENV_API_HOST, DEFAULT_API_HOST)
242
+ )
243
+ api_scheme: str = field(
244
+ default_factory=lambda: _env_http_scheme(
245
+ ENV_API_SCHEME,
246
+ DEFAULT_API_SCHEME,
247
+ ),
248
+ )
249
+ otlp_host: str = field(
250
+ default_factory=lambda: _env_str(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
251
+ )
252
+ otlp_scheme: str = field(
253
+ default_factory=lambda: _env_http_scheme(
254
+ ENV_OTLP_SCHEME,
255
+ DEFAULT_OTLP_SCHEME,
256
+ ),
257
+ )
258
+ flight_host: str = field(
259
+ default_factory=lambda: _env_str(ENV_FLIGHT_HOST, DEFAULT_FLIGHT_HOST)
260
+ )
261
+ flight_port: int = field(
262
+ default_factory=lambda: _env_int(
263
+ ENV_FLIGHT_PORT, DEFAULT_FLIGHT_PORT, min_val=1, max_val=65535
264
+ )
265
+ )
266
+ flight_scheme: str = field(
267
+ default_factory=lambda: _env_str(
268
+ ENV_FLIGHT_SCHEME,
269
+ DEFAULT_FLIGHT_SCHEME,
270
+ ),
271
+ )
272
+ pyarrow_max_chunksize: int = field(
273
+ default_factory=lambda: _env_int(
274
+ ENV_PYARROW_MAX_CHUNKSIZE,
275
+ DEFAULT_PYARROW_MAX_CHUNKSIZE,
276
+ min_val=1,
277
+ max_val=MAX_CHUNKSIZE,
278
+ )
279
+ )
280
+ request_verify: bool = field(
281
+ default_factory=lambda: _env_bool(
282
+ ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY
283
+ )
284
+ )
285
+ stream_max_workers: int = field(
286
+ default_factory=lambda: _env_int(
287
+ ENV_STREAM_MAX_WORKERS, DEFAULT_STREAM_MAX_WORKERS, min_val=1
288
+ )
289
+ )
157
290
  stream_max_queue_bound: int = field(
158
- default_factory=_stream_max_queue_bound_factory
291
+ default_factory=lambda: _env_int(
292
+ ENV_STREAM_MAX_QUEUE_BOUND,
293
+ DEFAULT_STREAM_MAX_QUEUE_BOUND,
294
+ min_val=1,
295
+ )
159
296
  )
160
297
  max_http_payload_size_mb: float = field(
161
- default_factory=_max_http_payload_size_mb_factory
298
+ default_factory=lambda: _env_float(
299
+ ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
300
+ DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
301
+ min_val=1,
302
+ )
303
+ )
304
+ arize_directory: str = field(
305
+ default_factory=lambda: _env_str(
306
+ ENV_ARIZE_DIRECTORY, DEFAULT_ARIZE_DIRECTORY
307
+ )
308
+ )
309
+ enable_caching: bool = field(
310
+ default_factory=lambda: _env_bool(
311
+ ENV_ENABLE_CACHING, DEFAULT_ENABLE_CACHING
312
+ )
313
+ )
314
+ region: Region = field(
315
+ default_factory=lambda: Region(_env_str(ENV_REGION, ""))
316
+ )
317
+ single_host: str = field(
318
+ default_factory=lambda: _env_str(ENV_SINGLE_HOST, "")
319
+ )
320
+ single_port: int = field(
321
+ default_factory=lambda: _env_int(
322
+ ENV_SINGLE_PORT, 0, min_val=0, max_val=65535
323
+ )
162
324
  )
163
- arize_direcory: str = field(default_factory=_arize_dir_factory)
164
- enable_caching: bool = field(default_factory=_enable_cache_factory)
165
325
 
166
326
  # Private, excluded from comparisons & repr
167
- _headers: Dict[str, str] = field(init=False, repr=False, compare=False)
168
327
  _gen_client: Any = field(default=None, repr=False, compare=False)
169
328
  _gen_lock: threading.Lock = field(
170
329
  default_factory=threading.Lock, repr=False, compare=False
171
330
  )
172
331
 
173
- def __post_init__(self):
332
+ def __post_init__(self) -> None:
333
+ """Validate and configure SDK endpoints after initialization.
334
+
335
+ Raises:
336
+ MissingAPIKeyError: If API key is not provided.
337
+ """
174
338
  # Validate Configuration
175
339
  if not self.api_key:
176
340
  raise MissingAPIKeyError()
177
341
 
342
+ has_single_host = bool(self.single_host)
343
+ has_single_port = self.single_port != 0
344
+ has_region = self.region is not Region.UNSPECIFIED
345
+ if (has_single_host or has_single_port) and has_region:
346
+ logger.info(
347
+ "Multiple endpoint override options provided. Preference order is: "
348
+ "region > single_host/single_port > per-endpoint host/port."
349
+ )
350
+
351
+ # Single host override: if single_host is set, it overrides hosts
352
+ if has_single_host:
353
+ logger.info(
354
+ "Single host %r provided; overriding hosts configuration with single host.",
355
+ self.single_host,
356
+ )
357
+ object.__setattr__(self, "api_host", self.single_host)
358
+ object.__setattr__(self, "otlp_host", self.single_host)
359
+ object.__setattr__(self, "flight_host", self.single_host)
360
+
361
+ # Single port override: if single_port is set, it overrides ports
362
+ if has_single_port:
363
+ logger.info(
364
+ "Single port %s provided; overriding ports configuration with single port.",
365
+ self.single_port,
366
+ )
367
+ object.__setattr__(self, "flight_port", self.single_port)
368
+
369
+ # Region override: if region is set, it *always* wins over host/port fields
370
+ if has_region:
371
+ endpoints = REGION_ENDPOINTS[self.region]
372
+
373
+ # Override config (region trumps everything)
374
+ logger.info(
375
+ "Region %s provided; overriding hosts & ports configuration with region defaults.",
376
+ self.region.value,
377
+ )
378
+ object.__setattr__(self, "api_host", endpoints.api_host)
379
+ object.__setattr__(self, "otlp_host", endpoints.otlp_host)
380
+ object.__setattr__(self, "flight_host", endpoints.flight_host)
381
+ object.__setattr__(self, "flight_port", endpoints.flight_port)
382
+
178
383
  @property
179
384
  def cache_dir(self) -> str:
180
- return str(Path(self.arize_direcory) / "cache")
385
+ """Return the path to the cache directory."""
386
+ return str(Path(self.arize_directory) / "cache")
181
387
 
182
388
  @property
183
389
  def api_url(self) -> str:
390
+ """Return the base API URL."""
184
391
  return _endpoint(self.api_scheme, self.api_host)
185
392
 
186
393
  @property
187
394
  def otlp_url(self) -> str:
395
+ """Return the OTLP endpoint URL."""
188
396
  return _endpoint(self.otlp_scheme, self.otlp_host, "/v1")
189
397
 
190
398
  @property
191
399
  def files_url(self) -> str:
400
+ """Return the files upload endpoint URL."""
192
401
  return _endpoint(self.api_scheme, self.api_host, "/v1/pandas_arrow")
193
402
 
194
403
  @property
195
404
  def records_url(self) -> str:
405
+ """Return the records logging endpoint URL."""
196
406
  return _endpoint(self.api_scheme, self.api_host, "/v1/log")
197
407
 
198
408
  @property
199
- def headers(self) -> Dict[str, str]:
409
+ def headers(self) -> dict[str, str]:
410
+ """Return HTTP headers for API requests."""
200
411
  # Create base headers
201
412
  return {
202
413
  "authorization": self.api_key,
203
414
  "sdk-language": "python",
204
- "language-version": get_python_version(),
415
+ "language-version": PYTHON_VERSION,
205
416
  "sdk-version": __version__,
206
417
  # "arize-space-id": self._space_id,
207
418
  # "arize-interface": "batch",
@@ -209,31 +420,34 @@ class SDKConfiguration:
209
420
  }
210
421
 
211
422
  @property
212
- def headers_grpc(self) -> Dict[str, str]:
423
+ def headers_grpc(self) -> dict[str, str]:
424
+ """Return headers for gRPC requests."""
213
425
  return {
214
426
  "authorization": self.api_key,
215
427
  "Grpc-Metadata-sdk-language": "python",
216
- "Grpc-Metadata-language-version": get_python_version(),
428
+ "Grpc-Metadata-language-version": PYTHON_VERSION,
217
429
  "Grpc-Metadata-sdk-version": __version__,
218
430
  # "Grpc-Metadata-arize-space-id": space_id,
219
431
  # "Grpc-Metadata-arize-interface": "stream",
220
432
  }
221
433
 
222
434
  def __repr__(self) -> str:
435
+ """Return a detailed string representation with masked sensitive fields."""
223
436
  # Dynamically build repr for all fields
224
437
  lines = [f"{self.__class__.__name__}("]
225
438
  for f in fields(self):
226
439
  if not f.repr:
227
440
  continue
228
441
  val = getattr(self, f.name)
229
- if f.name == "api_key":
442
+ if _is_sensitive_field(f.name):
230
443
  val = _mask_secret(val, 6)
231
444
  lines.append(f" {f.name}={val!r},")
232
445
  lines.append(")")
233
446
  return "\n".join(lines)
234
447
 
235
448
  # TODO(Kiko): This may not be well placed in this class
236
- def get_generated_client(self):
449
+ def get_generated_client(self) -> object:
450
+ """Get or create the generated OpenAPI client instance."""
237
451
  # If already cached, return immediately
238
452
  if self._gen_client is not None:
239
453
  return self._gen_client
@@ -243,21 +457,15 @@ class SDKConfiguration:
243
457
  if self._gen_client is not None:
244
458
  return self._gen_client
245
459
 
246
- # Import lazily so extras can be enforced outside
460
+ # Import lazily so extra dependencies can be
461
+ # enforced outside the configuration class
247
462
  from arize._generated import api_client as gen
248
463
 
249
464
  cfg = gen.Configuration(host=self.api_url)
250
465
  if self.api_key:
251
- cfg.api_key["ApiKeyAuth"] = self.api_key
466
+ cfg.access_token = self.api_key
252
467
  client = gen.ApiClient(cfg)
253
468
 
254
469
  # Bypass frozen to set the cache once
255
470
  object.__setattr__(self, "_gen_client", client)
256
471
  return client
257
-
258
-
259
- def get_python_version():
260
- return (
261
- f"{sys.version_info.major}.{sys.version_info.minor}."
262
- f"{sys.version_info.micro}"
263
- )
@@ -0,0 +1 @@
1
+ """Constants and configuration values used across the Arize SDK."""
arize/constants/config.py CHANGED
@@ -1,28 +1,35 @@
1
+ """Configuration constants and environment variable names."""
2
+
1
3
  # Authentication
2
4
  ENV_API_KEY = "ARIZE_API_KEY"
3
5
 
4
6
  # Server configuration env vars
7
+ ENV_REGION = "ARIZE_REGION"
5
8
  ENV_API_HOST = "ARIZE_API_HOST"
9
+ ENV_API_SCHEME = "ARIZE_API_SCHEME"
6
10
  ENV_OTLP_HOST = "ARIZE_OTLP_HOST"
11
+ ENV_OTLP_SCHEME = "ARIZE_OTLP_SCHEME"
7
12
  ENV_FLIGHT_HOST = "ARIZE_FLIGHT_HOST"
8
13
  ENV_FLIGHT_PORT = "ARIZE_FLIGHT_PORT"
9
- ENV_FLIGHT_TRANSPORT_SCHEME = "ARIZE_FLIGHT_TRANSPORT_SCHEME"
14
+ ENV_FLIGHT_SCHEME = "ARIZE_FLIGHT_SCHEME"
15
+ ENV_SINGLE_HOST = "ARIZE_SINGLE_HOST"
16
+ ENV_SINGLE_PORT = "ARIZE_SINGLE_PORT"
10
17
  ENV_PYARROW_MAX_CHUNKSIZE = "ARIZE_MAX_CHUNKSIZE"
11
18
  ENV_REQUEST_VERIFY = "ARIZE_REQUEST_VERIFY"
12
- ENV_INSECURE = "ARIZE_INSECURE"
13
19
  ENV_MAX_HTTP_PAYLOAD_SIZE_MB = "ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB"
14
20
  ENV_ARIZE_DIRECTORY = "ARIZE_DIRECTORY"
15
21
  ENV_ENABLE_CACHING = "ARIZE_ENABLE_CACHING"
16
22
 
17
23
  # Server configuration default values
18
24
  DEFAULT_API_HOST = "api.arize.com" # NOTE: Must not prefix with https://
25
+ DEFAULT_API_SCHEME = "https"
19
26
  DEFAULT_OTLP_HOST = "otlp.arize.com" # NOTE: Must not prefix with https://
27
+ DEFAULT_OTLP_SCHEME = "https"
20
28
  DEFAULT_FLIGHT_HOST = "flight.arize.com" # NOTE: Must not prefix with https://
21
29
  DEFAULT_FLIGHT_PORT = 443
22
- DEFAULT_FLIGHT_TRANSPORT_SCHEME = "grpc+tls"
30
+ DEFAULT_FLIGHT_SCHEME = "grpc+tls"
23
31
  DEFAULT_PYARROW_MAX_CHUNKSIZE = 10_000
24
32
  DEFAULT_REQUEST_VERIFY = True
25
- DEFAULT_INSECURE = False
26
33
  DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB = 100
27
34
  DEFAULT_ARIZE_DIRECTORY = "~/.arize"
28
35
  DEFAULT_ENABLE_CACHING = True
arize/constants/ml.py CHANGED
@@ -1,3 +1,5 @@
1
+ """Machine learning constants and validation limits."""
2
+
1
3
  import json
2
4
  from pathlib import Path
3
5
 
@@ -30,7 +32,7 @@ MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
30
32
  MAX_NUMBER_OF_EMBEDDINGS = 30
31
33
  MAX_EMBEDDING_DIMENSIONALITY = 20_000
32
34
  # # The maximum number of classes for multi class
33
- MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 300
35
+ MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 500
34
36
  MAX_MULTI_CLASS_NAME_LENGTH = 100
35
37
  # The maximum number of references in embedding similarity search params
36
38
  MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
@@ -40,9 +42,9 @@ MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
40
42
  # GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
41
43
  #
42
44
  # # reserved columns for LLM run metadata
43
- LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count"
44
- LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count"
45
- LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count"
45
+ LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count" # noqa: S105
46
+ LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count" # noqa: S105
47
+ LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count" # noqa: S105
46
48
  LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
47
49
  #
48
50
  # all reserved tags
@@ -1,3 +1,5 @@
1
+ """OpenInference semantic convention constants and attribute definitions."""
2
+
1
3
  import openinference.semconv.trace as oinf
2
4
 
3
5
  OPEN_INFERENCE_JSON_STR_TYPES = frozenset(
@@ -1 +1,3 @@
1
+ """PyArrow-related constants for data processing."""
2
+
1
3
  MAX_CHUNKSIZE = 100_000
arize/constants/spans.py CHANGED
@@ -1,4 +1,6 @@
1
- # The defualt format used to parse datetime objects from strings
1
+ """Span-related constants and validation limits for tracing."""
2
+
3
+ # The default format used to parse datetime objects from strings
2
4
  DEFAULT_DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%f+00:00"
3
5
  # Minumum/Maximum number of characters for span/trace/parent ids in spans
4
6
  SPAN_ID_MIN_STR_LENGTH = 12
@@ -0,0 +1 @@
1
+ """Dataset management and validation utilities for the Arize SDK."""