arize 8.0.0a22__py3-none-any.whl → 8.0.0a23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +17 -9
- arize/_exporter/client.py +55 -36
- arize/_exporter/parsers/tracing_data_parser.py +41 -30
- arize/_exporter/validation.py +3 -3
- arize/_flight/client.py +207 -76
- arize/_generated/api_client/__init__.py +30 -6
- arize/_generated/api_client/api/__init__.py +1 -0
- arize/_generated/api_client/api/datasets_api.py +864 -190
- arize/_generated/api_client/api/experiments_api.py +167 -131
- arize/_generated/api_client/api/projects_api.py +1197 -0
- arize/_generated/api_client/api_client.py +2 -2
- arize/_generated/api_client/configuration.py +42 -34
- arize/_generated/api_client/exceptions.py +2 -2
- arize/_generated/api_client/models/__init__.py +15 -4
- arize/_generated/api_client/models/dataset.py +10 -10
- arize/_generated/api_client/models/dataset_example.py +111 -0
- arize/_generated/api_client/models/dataset_example_update.py +100 -0
- arize/_generated/api_client/models/dataset_version.py +13 -13
- arize/_generated/api_client/models/datasets_create_request.py +16 -8
- arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
- arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
- arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
- arize/_generated/api_client/models/datasets_list200_response.py +10 -4
- arize/_generated/api_client/models/experiment.py +14 -16
- arize/_generated/api_client/models/experiment_run.py +108 -0
- arize/_generated/api_client/models/experiment_run_create.py +102 -0
- arize/_generated/api_client/models/experiments_create_request.py +16 -10
- arize/_generated/api_client/models/experiments_list200_response.py +10 -4
- arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
- arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
- arize/_generated/api_client/models/primitive_value.py +172 -0
- arize/_generated/api_client/models/problem.py +100 -0
- arize/_generated/api_client/models/project.py +99 -0
- arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
- arize/_generated/api_client/models/projects_list200_response.py +106 -0
- arize/_generated/api_client/rest.py +2 -2
- arize/_generated/api_client/test/test_dataset.py +4 -2
- arize/_generated/api_client/test/test_dataset_example.py +56 -0
- arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
- arize/_generated/api_client/test/test_dataset_version.py +7 -2
- arize/_generated/api_client/test/test_datasets_api.py +27 -13
- arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
- arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
- arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
- arize/_generated/api_client/test/test_experiment.py +2 -4
- arize/_generated/api_client/test/test_experiment_run.py +56 -0
- arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
- arize/_generated/api_client/test/test_experiments_api.py +6 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
- arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
- arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
- arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
- arize/_generated/api_client/test/test_problem.py +57 -0
- arize/_generated/api_client/test/test_project.py +58 -0
- arize/_generated/api_client/test/test_projects_api.py +59 -0
- arize/_generated/api_client/test/test_projects_create_request.py +54 -0
- arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
- arize/_generated/api_client_README.md +43 -29
- arize/_generated/protocol/flight/flight_pb2.py +400 -0
- arize/_lazy.py +27 -19
- arize/client.py +268 -55
- arize/config.py +365 -116
- arize/constants/__init__.py +1 -0
- arize/constants/config.py +11 -4
- arize/constants/ml.py +6 -4
- arize/constants/openinference.py +2 -0
- arize/constants/pyarrow.py +2 -0
- arize/constants/spans.py +3 -1
- arize/datasets/__init__.py +1 -0
- arize/datasets/client.py +299 -84
- arize/datasets/errors.py +32 -2
- arize/datasets/validation.py +18 -8
- arize/embeddings/__init__.py +2 -0
- arize/embeddings/auto_generator.py +23 -19
- arize/embeddings/base_generators.py +89 -36
- arize/embeddings/constants.py +2 -0
- arize/embeddings/cv_generators.py +26 -4
- arize/embeddings/errors.py +27 -5
- arize/embeddings/nlp_generators.py +31 -12
- arize/embeddings/tabular_generators.py +32 -20
- arize/embeddings/usecases.py +12 -2
- arize/exceptions/__init__.py +1 -0
- arize/exceptions/auth.py +11 -1
- arize/exceptions/base.py +29 -4
- arize/exceptions/models.py +21 -2
- arize/exceptions/parameters.py +31 -0
- arize/exceptions/spaces.py +12 -1
- arize/exceptions/types.py +86 -7
- arize/exceptions/values.py +220 -20
- arize/experiments/__init__.py +1 -0
- arize/experiments/client.py +389 -285
- arize/experiments/evaluators/__init__.py +1 -0
- arize/experiments/evaluators/base.py +74 -41
- arize/experiments/evaluators/exceptions.py +6 -3
- arize/experiments/evaluators/executors.py +121 -73
- arize/experiments/evaluators/rate_limiters.py +106 -57
- arize/experiments/evaluators/types.py +34 -7
- arize/experiments/evaluators/utils.py +65 -27
- arize/experiments/functions.py +103 -101
- arize/experiments/tracing.py +52 -44
- arize/experiments/types.py +56 -31
- arize/logging.py +54 -22
- arize/models/__init__.py +1 -0
- arize/models/batch_validation/__init__.py +1 -0
- arize/models/batch_validation/errors.py +543 -65
- arize/models/batch_validation/validator.py +339 -300
- arize/models/bounded_executor.py +20 -7
- arize/models/casting.py +75 -29
- arize/models/client.py +326 -107
- arize/models/proto.py +95 -40
- arize/models/stream_validation.py +42 -14
- arize/models/surrogate_explainer/__init__.py +1 -0
- arize/models/surrogate_explainer/mimic.py +24 -13
- arize/pre_releases.py +43 -0
- arize/projects/__init__.py +1 -0
- arize/projects/client.py +129 -0
- arize/regions.py +40 -0
- arize/spans/__init__.py +1 -0
- arize/spans/client.py +130 -106
- arize/spans/columns.py +13 -0
- arize/spans/conversion.py +54 -38
- arize/spans/validation/__init__.py +1 -0
- arize/spans/validation/annotations/__init__.py +1 -0
- arize/spans/validation/annotations/annotations_validation.py +6 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
- arize/spans/validation/annotations/value_validation.py +35 -11
- arize/spans/validation/common/__init__.py +1 -0
- arize/spans/validation/common/argument_validation.py +33 -8
- arize/spans/validation/common/dataframe_form_validation.py +35 -9
- arize/spans/validation/common/errors.py +211 -11
- arize/spans/validation/common/value_validation.py +80 -13
- arize/spans/validation/evals/__init__.py +1 -0
- arize/spans/validation/evals/dataframe_form_validation.py +28 -8
- arize/spans/validation/evals/evals_validation.py +34 -4
- arize/spans/validation/evals/value_validation.py +26 -3
- arize/spans/validation/metadata/__init__.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +14 -5
- arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
- arize/spans/validation/metadata/value_validation.py +24 -10
- arize/spans/validation/spans/__init__.py +1 -0
- arize/spans/validation/spans/dataframe_form_validation.py +34 -13
- arize/spans/validation/spans/spans_validation.py +35 -4
- arize/spans/validation/spans/value_validation.py +76 -7
- arize/types.py +293 -157
- arize/utils/__init__.py +1 -0
- arize/utils/arrow.py +31 -15
- arize/utils/cache.py +34 -6
- arize/utils/dataframe.py +19 -2
- arize/utils/online_tasks/__init__.py +2 -0
- arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
- arize/utils/openinference_conversion.py +44 -5
- arize/utils/proto.py +10 -0
- arize/utils/size.py +5 -3
- arize/version.py +3 -1
- {arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
- arize-8.0.0a23.dist-info/RECORD +174 -0
- {arize-8.0.0a22.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
- arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
- arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
- arize/_generated/protocol/flight/export_pb2.py +0 -61
- arize/_generated/protocol/flight/ingest_pb2.py +0 -365
- arize-8.0.0a22.dist-info/RECORD +0 -146
- arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
arize/config.py
CHANGED
|
@@ -1,207 +1,459 @@
|
|
|
1
|
+
"""SDK configuration and settings management for the Arize client."""
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import os
|
|
3
5
|
import sys
|
|
4
6
|
import threading
|
|
5
7
|
from dataclasses import dataclass, field, fields
|
|
6
8
|
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
8
10
|
|
|
9
11
|
from arize.constants.config import (
|
|
10
12
|
DEFAULT_API_HOST,
|
|
13
|
+
DEFAULT_API_SCHEME,
|
|
11
14
|
DEFAULT_ARIZE_DIRECTORY,
|
|
12
15
|
DEFAULT_ENABLE_CACHING,
|
|
13
16
|
DEFAULT_FLIGHT_HOST,
|
|
14
17
|
DEFAULT_FLIGHT_PORT,
|
|
15
|
-
|
|
16
|
-
DEFAULT_INSECURE,
|
|
18
|
+
DEFAULT_FLIGHT_SCHEME,
|
|
17
19
|
DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
18
20
|
DEFAULT_OTLP_HOST,
|
|
21
|
+
DEFAULT_OTLP_SCHEME,
|
|
19
22
|
DEFAULT_PYARROW_MAX_CHUNKSIZE,
|
|
20
23
|
DEFAULT_REQUEST_VERIFY,
|
|
21
24
|
DEFAULT_STREAM_MAX_QUEUE_BOUND,
|
|
22
25
|
DEFAULT_STREAM_MAX_WORKERS,
|
|
23
26
|
ENV_API_HOST,
|
|
24
27
|
ENV_API_KEY,
|
|
28
|
+
ENV_API_SCHEME,
|
|
25
29
|
ENV_ARIZE_DIRECTORY,
|
|
26
30
|
ENV_ENABLE_CACHING,
|
|
27
31
|
ENV_FLIGHT_HOST,
|
|
28
32
|
ENV_FLIGHT_PORT,
|
|
29
|
-
|
|
30
|
-
ENV_INSECURE,
|
|
33
|
+
ENV_FLIGHT_SCHEME,
|
|
31
34
|
ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
32
35
|
ENV_OTLP_HOST,
|
|
36
|
+
ENV_OTLP_SCHEME,
|
|
33
37
|
ENV_PYARROW_MAX_CHUNKSIZE,
|
|
38
|
+
ENV_REGION,
|
|
34
39
|
ENV_REQUEST_VERIFY,
|
|
40
|
+
ENV_SINGLE_HOST,
|
|
41
|
+
ENV_SINGLE_PORT,
|
|
35
42
|
ENV_STREAM_MAX_QUEUE_BOUND,
|
|
36
43
|
ENV_STREAM_MAX_WORKERS,
|
|
37
44
|
)
|
|
38
45
|
from arize.constants.pyarrow import MAX_CHUNKSIZE
|
|
39
46
|
from arize.exceptions.auth import MissingAPIKeyError
|
|
47
|
+
from arize.regions import REGION_ENDPOINTS, Region
|
|
40
48
|
from arize.version import __version__
|
|
41
49
|
|
|
42
50
|
logger = logging.getLogger(__name__)
|
|
43
51
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
return val
|
|
48
|
-
return (val or "").strip().lower() in {"1", "true", "yes", "on"}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def _api_key_factory() -> str:
|
|
52
|
-
return os.getenv(ENV_API_KEY, "")
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def _api_host_factory() -> str:
|
|
56
|
-
return os.getenv(ENV_API_HOST, DEFAULT_API_HOST)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _api_scheme_factory() -> str:
|
|
60
|
-
insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
|
|
61
|
-
if insecure:
|
|
62
|
-
return "http"
|
|
63
|
-
return "https"
|
|
52
|
+
PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
|
53
|
+
SENSITIVE_FIELD_MARKERS = ("key", "token", "secret")
|
|
54
|
+
ALLOWED_HTTP_SCHEMES = {"http", "https"}
|
|
64
55
|
|
|
65
56
|
|
|
66
|
-
def
|
|
67
|
-
|
|
57
|
+
def _is_sensitive_field(name: str) -> bool:
|
|
58
|
+
n = name.lower()
|
|
59
|
+
return bool(any(k in n for k in SENSITIVE_FIELD_MARKERS))
|
|
68
60
|
|
|
69
61
|
|
|
70
|
-
def
|
|
71
|
-
|
|
62
|
+
def _mask_secret(secret: str, N: int = 4) -> str:
|
|
63
|
+
"""Show first N chars then '***'; empty string if empty."""
|
|
64
|
+
if len(secret) == 0:
|
|
65
|
+
return ""
|
|
66
|
+
return f"{secret[:N]}***"
|
|
72
67
|
|
|
73
68
|
|
|
74
|
-
def
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
69
|
+
def _endpoint(scheme: str, base: str, path: str = "") -> str:
|
|
70
|
+
endpoint = scheme + "://" + base.rstrip("/")
|
|
71
|
+
if path:
|
|
72
|
+
endpoint += "/" + path.lstrip("/")
|
|
73
|
+
return endpoint
|
|
78
74
|
|
|
79
75
|
|
|
80
|
-
def
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
)
|
|
84
|
-
if max_chunksize <= 0 or max_chunksize > MAX_CHUNKSIZE:
|
|
76
|
+
def _env_http_scheme(name: str, default: str) -> str:
|
|
77
|
+
v = _env_str(name, default).lower()
|
|
78
|
+
if v not in ALLOWED_HTTP_SCHEMES:
|
|
85
79
|
raise ValueError(
|
|
86
|
-
f"
|
|
80
|
+
f"{name} must be one of {sorted(ALLOWED_HTTP_SCHEMES)}. Found {v!r}"
|
|
87
81
|
)
|
|
88
|
-
return
|
|
89
|
-
|
|
82
|
+
return v
|
|
90
83
|
|
|
91
|
-
def _verify_factory() -> bool:
|
|
92
|
-
return _parse_bool(os.getenv(ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY))
|
|
93
84
|
|
|
85
|
+
def _env_str(
|
|
86
|
+
name: str,
|
|
87
|
+
default: str,
|
|
88
|
+
min_len: int | None = None,
|
|
89
|
+
max_len: int | None = None,
|
|
90
|
+
) -> str:
|
|
91
|
+
val = os.getenv(name, default).strip()
|
|
94
92
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def _stream_max_queue_bound_factory() -> int:
|
|
100
|
-
return int(
|
|
101
|
-
os.getenv(ENV_STREAM_MAX_QUEUE_BOUND, DEFAULT_STREAM_MAX_QUEUE_BOUND)
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _otlp_scheme_factory() -> str:
|
|
106
|
-
insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
|
|
107
|
-
if insecure:
|
|
108
|
-
return "http"
|
|
109
|
-
return "https"
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _otlp_host_factory() -> str:
|
|
113
|
-
return os.getenv(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def _max_http_payload_size_mb_factory() -> float:
|
|
117
|
-
return float(
|
|
118
|
-
os.getenv(
|
|
119
|
-
ENV_MAX_HTTP_PAYLOAD_SIZE_MB, DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB
|
|
93
|
+
if min_len is not None and len(val) < min_len:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"The value of environment variable {name} must be at least {min_len} "
|
|
96
|
+
f"characters long. Found {len(val)} characters."
|
|
120
97
|
)
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
98
|
+
if max_len is not None and len(val) > max_len:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"The value of environment variable {name} must be at most {max_len} "
|
|
101
|
+
f"characters long. Found {len(val)} characters."
|
|
102
|
+
)
|
|
103
|
+
return val
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _env_int(
|
|
107
|
+
name: str,
|
|
108
|
+
default: int,
|
|
109
|
+
min_val: int | None = None,
|
|
110
|
+
max_val: int | None = None,
|
|
111
|
+
) -> int:
|
|
112
|
+
raw = os.getenv(name, default)
|
|
113
|
+
try:
|
|
114
|
+
val = int(raw)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
f"Environment variable {name} must be an int. Found: {raw!r}"
|
|
118
|
+
) from e
|
|
126
119
|
|
|
120
|
+
if min_val is not None and val < min_val:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"The value of environment variable {name} must be at least {min_val}. Found {val}."
|
|
123
|
+
)
|
|
124
|
+
if max_val is not None and val > max_val:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"The value of environment variable {name} must be at most {max_val}. Found {val}."
|
|
127
|
+
)
|
|
128
|
+
return val
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _env_float(
|
|
132
|
+
name: str,
|
|
133
|
+
default: float,
|
|
134
|
+
min_val: float | None = None,
|
|
135
|
+
max_val: float | None = None,
|
|
136
|
+
) -> float:
|
|
137
|
+
raw = os.getenv(name, default)
|
|
138
|
+
try:
|
|
139
|
+
val = float(raw)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Environment variable {name} must be a float. Found: {raw!r}"
|
|
143
|
+
) from e
|
|
127
144
|
|
|
128
|
-
|
|
129
|
-
|
|
145
|
+
if min_val is not None and val < min_val:
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"The value of environment variable {name} must be at least {min_val}. Found {val}."
|
|
148
|
+
)
|
|
149
|
+
if max_val is not None and val > max_val:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
f"The value of environment variable {name} must be at most {max_val}. Found {val}."
|
|
152
|
+
)
|
|
153
|
+
return val
|
|
130
154
|
|
|
131
155
|
|
|
132
|
-
def
|
|
133
|
-
|
|
134
|
-
return f"{secret[:N]}***"
|
|
156
|
+
def _env_bool(name: str, default: bool) -> bool:
|
|
157
|
+
return _parse_bool(os.getenv(name, str(default)))
|
|
135
158
|
|
|
136
159
|
|
|
137
|
-
def
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
return endpoint
|
|
160
|
+
def _parse_bool(val: bool | str | None) -> bool:
|
|
161
|
+
if isinstance(val, bool):
|
|
162
|
+
return val
|
|
163
|
+
return (val or "").strip().lower() in {"1", "true", "yes", "on"}
|
|
142
164
|
|
|
143
165
|
|
|
144
166
|
@dataclass(frozen=True)
|
|
145
167
|
class SDKConfiguration:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
168
|
+
"""Configuration for the Arize SDK with endpoint and authentication settings.
|
|
169
|
+
|
|
170
|
+
This class is used internally by ArizeClient to manage SDK configuration. Users
|
|
171
|
+
typically interact with ArizeClient rather than instantiating this class directly.
|
|
172
|
+
|
|
173
|
+
Configuration Precedence
|
|
174
|
+
------------------------
|
|
175
|
+
Each configuration parameter follows this resolution order:
|
|
176
|
+
1. Explicit value passed to ArizeClient constructor (highest priority)
|
|
177
|
+
2. Environment variable value
|
|
178
|
+
3. Built-in default value (lowest priority)
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
api_key : str
|
|
183
|
+
Arize API key for authentication. Required.
|
|
184
|
+
Environment variable: ARIZE_API_KEY
|
|
185
|
+
Default: None (must be provided via argument or environment variable)
|
|
186
|
+
|
|
187
|
+
api_host : str
|
|
188
|
+
API endpoint host.
|
|
189
|
+
Environment variable: ARIZE_API_HOST
|
|
190
|
+
Default: "api.arize.com"
|
|
191
|
+
|
|
192
|
+
api_scheme : str
|
|
193
|
+
API endpoint scheme (http/https).
|
|
194
|
+
Environment variable: ARIZE_API_SCHEME
|
|
195
|
+
Default: "https"
|
|
196
|
+
|
|
197
|
+
otlp_host : str
|
|
198
|
+
OTLP (OpenTelemetry Protocol) endpoint host.
|
|
199
|
+
Environment variable: ARIZE_OTLP_HOST
|
|
200
|
+
Default: "otlp.arize.com"
|
|
201
|
+
|
|
202
|
+
otlp_scheme : str
|
|
203
|
+
OTLP endpoint scheme (http/https).
|
|
204
|
+
Environment variable: ARIZE_OTLP_SCHEME
|
|
205
|
+
Default: "https"
|
|
206
|
+
|
|
207
|
+
flight_host : str
|
|
208
|
+
Apache Arrow Flight endpoint host.
|
|
209
|
+
Environment variable: ARIZE_FLIGHT_HOST
|
|
210
|
+
Default: "flight.arize.com"
|
|
211
|
+
|
|
212
|
+
flight_port : int
|
|
213
|
+
Apache Arrow Flight endpoint port (1-65535).
|
|
214
|
+
Environment variable: ARIZE_FLIGHT_PORT
|
|
215
|
+
Default: 443
|
|
216
|
+
|
|
217
|
+
flight_scheme : str
|
|
218
|
+
Apache Arrow Flight endpoint scheme.
|
|
219
|
+
Environment variable: ARIZE_FLIGHT_SCHEME
|
|
220
|
+
Default: "grpc+tls"
|
|
221
|
+
|
|
222
|
+
pyarrow_max_chunksize : int
|
|
223
|
+
Maximum chunk size for PyArrow operations (1 to MAX_CHUNKSIZE).
|
|
224
|
+
Environment variable: ARIZE_MAX_CHUNKSIZE
|
|
225
|
+
Default: 10_000
|
|
226
|
+
|
|
227
|
+
request_verify : bool
|
|
228
|
+
Whether to verify SSL certificates for HTTP requests.
|
|
229
|
+
Environment variable: ARIZE_REQUEST_VERIFY
|
|
230
|
+
Default: True
|
|
231
|
+
|
|
232
|
+
stream_max_workers : int
|
|
233
|
+
Maximum number of worker threads for streaming operations (minimum: 1).
|
|
234
|
+
Environment variable: ARIZE_STREAM_MAX_WORKERS
|
|
235
|
+
Default: 8
|
|
236
|
+
|
|
237
|
+
stream_max_queue_bound : int
|
|
238
|
+
Maximum queue size for streaming operations (minimum: 1).
|
|
239
|
+
Environment variable: ARIZE_STREAM_MAX_QUEUE_BOUND
|
|
240
|
+
Default: 5000
|
|
241
|
+
|
|
242
|
+
max_http_payload_size_mb : float
|
|
243
|
+
Maximum HTTP payload size in megabytes (minimum: 1).
|
|
244
|
+
Environment variable: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB
|
|
245
|
+
Default: 100
|
|
246
|
+
|
|
247
|
+
arize_directory : str
|
|
248
|
+
Directory for Arize SDK files (cache, logs, etc.).
|
|
249
|
+
Environment variable: ARIZE_DIRECTORY
|
|
250
|
+
Default: "~/.arize"
|
|
251
|
+
|
|
252
|
+
enable_caching : bool
|
|
253
|
+
Whether to enable local caching.
|
|
254
|
+
Environment variable: ARIZE_ENABLE_CACHING
|
|
255
|
+
Default: True
|
|
256
|
+
|
|
257
|
+
region : Region
|
|
258
|
+
Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
|
|
259
|
+
individual host/port settings.
|
|
260
|
+
Environment variable: ARIZE_REGION
|
|
261
|
+
Default: Region.UNSPECIFIED
|
|
262
|
+
|
|
263
|
+
single_host : str
|
|
264
|
+
Single host to use for all endpoints. Overrides individual host settings.
|
|
265
|
+
Environment variable: ARIZE_SINGLE_HOST
|
|
266
|
+
Default: "" (not set)
|
|
267
|
+
|
|
268
|
+
single_port : int
|
|
269
|
+
Single port to use for all endpoints. Overrides individual port settings (0-65535).
|
|
270
|
+
Environment variable: ARIZE_SINGLE_PORT
|
|
271
|
+
Default: 0 (not set)
|
|
272
|
+
|
|
273
|
+
See Also:
|
|
274
|
+
--------
|
|
275
|
+
ArizeClient : Main client class that uses this configuration
|
|
276
|
+
"""
|
|
277
|
+
|
|
278
|
+
api_key: str = field(
|
|
279
|
+
default_factory=lambda: _env_str(ENV_API_KEY, ""),
|
|
280
|
+
)
|
|
281
|
+
api_host: str = field(
|
|
282
|
+
default_factory=lambda: _env_str(ENV_API_HOST, DEFAULT_API_HOST)
|
|
283
|
+
)
|
|
284
|
+
api_scheme: str = field(
|
|
285
|
+
default_factory=lambda: _env_http_scheme(
|
|
286
|
+
ENV_API_SCHEME,
|
|
287
|
+
DEFAULT_API_SCHEME,
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
otlp_host: str = field(
|
|
291
|
+
default_factory=lambda: _env_str(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
|
|
292
|
+
)
|
|
293
|
+
otlp_scheme: str = field(
|
|
294
|
+
default_factory=lambda: _env_http_scheme(
|
|
295
|
+
ENV_OTLP_SCHEME,
|
|
296
|
+
DEFAULT_OTLP_SCHEME,
|
|
297
|
+
),
|
|
298
|
+
)
|
|
299
|
+
flight_host: str = field(
|
|
300
|
+
default_factory=lambda: _env_str(ENV_FLIGHT_HOST, DEFAULT_FLIGHT_HOST)
|
|
301
|
+
)
|
|
302
|
+
flight_port: int = field(
|
|
303
|
+
default_factory=lambda: _env_int(
|
|
304
|
+
ENV_FLIGHT_PORT, DEFAULT_FLIGHT_PORT, min_val=1, max_val=65535
|
|
305
|
+
)
|
|
306
|
+
)
|
|
307
|
+
flight_scheme: str = field(
|
|
308
|
+
default_factory=lambda: _env_str(
|
|
309
|
+
ENV_FLIGHT_SCHEME,
|
|
310
|
+
DEFAULT_FLIGHT_SCHEME,
|
|
311
|
+
),
|
|
312
|
+
)
|
|
313
|
+
pyarrow_max_chunksize: int = field(
|
|
314
|
+
default_factory=lambda: _env_int(
|
|
315
|
+
ENV_PYARROW_MAX_CHUNKSIZE,
|
|
316
|
+
DEFAULT_PYARROW_MAX_CHUNKSIZE,
|
|
317
|
+
min_val=1,
|
|
318
|
+
max_val=MAX_CHUNKSIZE,
|
|
319
|
+
)
|
|
320
|
+
)
|
|
321
|
+
request_verify: bool = field(
|
|
322
|
+
default_factory=lambda: _env_bool(
|
|
323
|
+
ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY
|
|
324
|
+
)
|
|
325
|
+
)
|
|
326
|
+
stream_max_workers: int = field(
|
|
327
|
+
default_factory=lambda: _env_int(
|
|
328
|
+
ENV_STREAM_MAX_WORKERS, DEFAULT_STREAM_MAX_WORKERS, min_val=1
|
|
329
|
+
)
|
|
330
|
+
)
|
|
157
331
|
stream_max_queue_bound: int = field(
|
|
158
|
-
default_factory=
|
|
332
|
+
default_factory=lambda: _env_int(
|
|
333
|
+
ENV_STREAM_MAX_QUEUE_BOUND,
|
|
334
|
+
DEFAULT_STREAM_MAX_QUEUE_BOUND,
|
|
335
|
+
min_val=1,
|
|
336
|
+
)
|
|
159
337
|
)
|
|
160
338
|
max_http_payload_size_mb: float = field(
|
|
161
|
-
default_factory=
|
|
339
|
+
default_factory=lambda: _env_float(
|
|
340
|
+
ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
341
|
+
DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
342
|
+
min_val=1,
|
|
343
|
+
)
|
|
344
|
+
)
|
|
345
|
+
arize_directory: str = field(
|
|
346
|
+
default_factory=lambda: _env_str(
|
|
347
|
+
ENV_ARIZE_DIRECTORY, DEFAULT_ARIZE_DIRECTORY
|
|
348
|
+
)
|
|
349
|
+
)
|
|
350
|
+
enable_caching: bool = field(
|
|
351
|
+
default_factory=lambda: _env_bool(
|
|
352
|
+
ENV_ENABLE_CACHING, DEFAULT_ENABLE_CACHING
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
region: Region = field(
|
|
356
|
+
default_factory=lambda: Region(_env_str(ENV_REGION, ""))
|
|
357
|
+
)
|
|
358
|
+
single_host: str = field(
|
|
359
|
+
default_factory=lambda: _env_str(ENV_SINGLE_HOST, "")
|
|
360
|
+
)
|
|
361
|
+
single_port: int = field(
|
|
362
|
+
default_factory=lambda: _env_int(
|
|
363
|
+
ENV_SINGLE_PORT, 0, min_val=0, max_val=65535
|
|
364
|
+
)
|
|
162
365
|
)
|
|
163
|
-
arize_direcory: str = field(default_factory=_arize_dir_factory)
|
|
164
|
-
enable_caching: bool = field(default_factory=_enable_cache_factory)
|
|
165
366
|
|
|
166
367
|
# Private, excluded from comparisons & repr
|
|
167
|
-
_headers: Dict[str, str] = field(init=False, repr=False, compare=False)
|
|
168
368
|
_gen_client: Any = field(default=None, repr=False, compare=False)
|
|
169
369
|
_gen_lock: threading.Lock = field(
|
|
170
370
|
default_factory=threading.Lock, repr=False, compare=False
|
|
171
371
|
)
|
|
172
372
|
|
|
173
|
-
def __post_init__(self):
|
|
373
|
+
def __post_init__(self) -> None:
|
|
374
|
+
"""Validate and configure SDK endpoints after initialization.
|
|
375
|
+
|
|
376
|
+
Raises:
|
|
377
|
+
MissingAPIKeyError: If API key is not provided.
|
|
378
|
+
"""
|
|
174
379
|
# Validate Configuration
|
|
175
380
|
if not self.api_key:
|
|
176
381
|
raise MissingAPIKeyError()
|
|
177
382
|
|
|
383
|
+
has_single_host = bool(self.single_host)
|
|
384
|
+
has_single_port = self.single_port != 0
|
|
385
|
+
has_region = self.region is not Region.UNSPECIFIED
|
|
386
|
+
if (has_single_host or has_single_port) and has_region:
|
|
387
|
+
logger.info(
|
|
388
|
+
"Multiple endpoint override options provided. Preference order is: "
|
|
389
|
+
"region > single_host/single_port > per-endpoint host/port."
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Single host override: if single_host is set, it overrides hosts
|
|
393
|
+
if has_single_host:
|
|
394
|
+
logger.info(
|
|
395
|
+
"Single host %r provided; overriding hosts configuration with single host.",
|
|
396
|
+
self.single_host,
|
|
397
|
+
)
|
|
398
|
+
object.__setattr__(self, "api_host", self.single_host)
|
|
399
|
+
object.__setattr__(self, "otlp_host", self.single_host)
|
|
400
|
+
object.__setattr__(self, "flight_host", self.single_host)
|
|
401
|
+
|
|
402
|
+
# Single port override: if single_port is set, it overrides ports
|
|
403
|
+
if has_single_port:
|
|
404
|
+
logger.info(
|
|
405
|
+
"Single port %s provided; overriding ports configuration with single port.",
|
|
406
|
+
self.single_port,
|
|
407
|
+
)
|
|
408
|
+
object.__setattr__(self, "flight_port", self.single_port)
|
|
409
|
+
|
|
410
|
+
# Region override: if region is set, it *always* wins over host/port fields
|
|
411
|
+
if has_region:
|
|
412
|
+
endpoints = REGION_ENDPOINTS[self.region]
|
|
413
|
+
|
|
414
|
+
# Override config (region trumps everything)
|
|
415
|
+
logger.info(
|
|
416
|
+
"Region %s provided; overriding hosts & ports configuration with region defaults.",
|
|
417
|
+
self.region.value,
|
|
418
|
+
)
|
|
419
|
+
object.__setattr__(self, "api_host", endpoints.api_host)
|
|
420
|
+
object.__setattr__(self, "otlp_host", endpoints.otlp_host)
|
|
421
|
+
object.__setattr__(self, "flight_host", endpoints.flight_host)
|
|
422
|
+
object.__setattr__(self, "flight_port", endpoints.flight_port)
|
|
423
|
+
|
|
178
424
|
@property
|
|
179
425
|
def cache_dir(self) -> str:
|
|
180
|
-
|
|
426
|
+
"""Return the path to the cache directory."""
|
|
427
|
+
return str(Path(self.arize_directory) / "cache")
|
|
181
428
|
|
|
182
429
|
@property
|
|
183
430
|
def api_url(self) -> str:
|
|
431
|
+
"""Return the base API URL."""
|
|
184
432
|
return _endpoint(self.api_scheme, self.api_host)
|
|
185
433
|
|
|
186
434
|
@property
|
|
187
435
|
def otlp_url(self) -> str:
|
|
436
|
+
"""Return the OTLP endpoint URL."""
|
|
188
437
|
return _endpoint(self.otlp_scheme, self.otlp_host, "/v1")
|
|
189
438
|
|
|
190
439
|
@property
|
|
191
440
|
def files_url(self) -> str:
|
|
441
|
+
"""Return the files upload endpoint URL."""
|
|
192
442
|
return _endpoint(self.api_scheme, self.api_host, "/v1/pandas_arrow")
|
|
193
443
|
|
|
194
444
|
@property
|
|
195
445
|
def records_url(self) -> str:
|
|
446
|
+
"""Return the records logging endpoint URL."""
|
|
196
447
|
return _endpoint(self.api_scheme, self.api_host, "/v1/log")
|
|
197
448
|
|
|
198
449
|
@property
|
|
199
|
-
def headers(self) ->
|
|
450
|
+
def headers(self) -> dict[str, str]:
|
|
451
|
+
"""Return HTTP headers for API requests."""
|
|
200
452
|
# Create base headers
|
|
201
453
|
return {
|
|
202
454
|
"authorization": self.api_key,
|
|
203
455
|
"sdk-language": "python",
|
|
204
|
-
"language-version":
|
|
456
|
+
"language-version": PYTHON_VERSION,
|
|
205
457
|
"sdk-version": __version__,
|
|
206
458
|
# "arize-space-id": self._space_id,
|
|
207
459
|
# "arize-interface": "batch",
|
|
@@ -209,31 +461,34 @@ class SDKConfiguration:
|
|
|
209
461
|
}
|
|
210
462
|
|
|
211
463
|
@property
|
|
212
|
-
def headers_grpc(self) ->
|
|
464
|
+
def headers_grpc(self) -> dict[str, str]:
|
|
465
|
+
"""Return headers for gRPC requests."""
|
|
213
466
|
return {
|
|
214
467
|
"authorization": self.api_key,
|
|
215
468
|
"Grpc-Metadata-sdk-language": "python",
|
|
216
|
-
"Grpc-Metadata-language-version":
|
|
469
|
+
"Grpc-Metadata-language-version": PYTHON_VERSION,
|
|
217
470
|
"Grpc-Metadata-sdk-version": __version__,
|
|
218
471
|
# "Grpc-Metadata-arize-space-id": space_id,
|
|
219
472
|
# "Grpc-Metadata-arize-interface": "stream",
|
|
220
473
|
}
|
|
221
474
|
|
|
222
475
|
def __repr__(self) -> str:
|
|
476
|
+
"""Return a detailed string representation with masked sensitive fields."""
|
|
223
477
|
# Dynamically build repr for all fields
|
|
224
478
|
lines = [f"{self.__class__.__name__}("]
|
|
225
479
|
for f in fields(self):
|
|
226
480
|
if not f.repr:
|
|
227
481
|
continue
|
|
228
482
|
val = getattr(self, f.name)
|
|
229
|
-
if f.name
|
|
483
|
+
if _is_sensitive_field(f.name):
|
|
230
484
|
val = _mask_secret(val, 6)
|
|
231
485
|
lines.append(f" {f.name}={val!r},")
|
|
232
486
|
lines.append(")")
|
|
233
487
|
return "\n".join(lines)
|
|
234
488
|
|
|
235
489
|
# TODO(Kiko): This may not be well placed in this class
|
|
236
|
-
def get_generated_client(self):
|
|
490
|
+
def get_generated_client(self) -> object:
|
|
491
|
+
"""Get or create the generated OpenAPI client instance."""
|
|
237
492
|
# If already cached, return immediately
|
|
238
493
|
if self._gen_client is not None:
|
|
239
494
|
return self._gen_client
|
|
@@ -243,21 +498,15 @@ class SDKConfiguration:
|
|
|
243
498
|
if self._gen_client is not None:
|
|
244
499
|
return self._gen_client
|
|
245
500
|
|
|
246
|
-
# Import lazily so
|
|
501
|
+
# Import lazily so extra dependencies can be
|
|
502
|
+
# enforced outside the configuration class
|
|
247
503
|
from arize._generated import api_client as gen
|
|
248
504
|
|
|
249
505
|
cfg = gen.Configuration(host=self.api_url)
|
|
250
506
|
if self.api_key:
|
|
251
|
-
cfg.
|
|
507
|
+
cfg.access_token = self.api_key
|
|
252
508
|
client = gen.ApiClient(cfg)
|
|
253
509
|
|
|
254
510
|
# Bypass frozen to set the cache once
|
|
255
511
|
object.__setattr__(self, "_gen_client", client)
|
|
256
512
|
return client
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
def get_python_version():
|
|
260
|
-
return (
|
|
261
|
-
f"{sys.version_info.major}.{sys.version_info.minor}."
|
|
262
|
-
f"{sys.version_info.micro}"
|
|
263
|
-
)
|
arize/constants/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Constants and configuration values used across the Arize SDK."""
|
arize/constants/config.py
CHANGED
|
@@ -1,28 +1,35 @@
|
|
|
1
|
+
"""Configuration constants and environment variable names."""
|
|
2
|
+
|
|
1
3
|
# Authentication
|
|
2
4
|
ENV_API_KEY = "ARIZE_API_KEY"
|
|
3
5
|
|
|
4
6
|
# Server configuration env vars
|
|
7
|
+
ENV_REGION = "ARIZE_REGION"
|
|
5
8
|
ENV_API_HOST = "ARIZE_API_HOST"
|
|
9
|
+
ENV_API_SCHEME = "ARIZE_API_SCHEME"
|
|
6
10
|
ENV_OTLP_HOST = "ARIZE_OTLP_HOST"
|
|
11
|
+
ENV_OTLP_SCHEME = "ARIZE_OTLP_SCHEME"
|
|
7
12
|
ENV_FLIGHT_HOST = "ARIZE_FLIGHT_HOST"
|
|
8
13
|
ENV_FLIGHT_PORT = "ARIZE_FLIGHT_PORT"
|
|
9
|
-
|
|
14
|
+
ENV_FLIGHT_SCHEME = "ARIZE_FLIGHT_SCHEME"
|
|
15
|
+
ENV_SINGLE_HOST = "ARIZE_SINGLE_HOST"
|
|
16
|
+
ENV_SINGLE_PORT = "ARIZE_SINGLE_PORT"
|
|
10
17
|
ENV_PYARROW_MAX_CHUNKSIZE = "ARIZE_MAX_CHUNKSIZE"
|
|
11
18
|
ENV_REQUEST_VERIFY = "ARIZE_REQUEST_VERIFY"
|
|
12
|
-
ENV_INSECURE = "ARIZE_INSECURE"
|
|
13
19
|
ENV_MAX_HTTP_PAYLOAD_SIZE_MB = "ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB"
|
|
14
20
|
ENV_ARIZE_DIRECTORY = "ARIZE_DIRECTORY"
|
|
15
21
|
ENV_ENABLE_CACHING = "ARIZE_ENABLE_CACHING"
|
|
16
22
|
|
|
17
23
|
# Server configuration default values
|
|
18
24
|
DEFAULT_API_HOST = "api.arize.com" # NOTE: Must not prefix with https://
|
|
25
|
+
DEFAULT_API_SCHEME = "https"
|
|
19
26
|
DEFAULT_OTLP_HOST = "otlp.arize.com" # NOTE: Must not prefix with https://
|
|
27
|
+
DEFAULT_OTLP_SCHEME = "https"
|
|
20
28
|
DEFAULT_FLIGHT_HOST = "flight.arize.com" # NOTE: Must not prefix with https://
|
|
21
29
|
DEFAULT_FLIGHT_PORT = 443
|
|
22
|
-
|
|
30
|
+
DEFAULT_FLIGHT_SCHEME = "grpc+tls"
|
|
23
31
|
DEFAULT_PYARROW_MAX_CHUNKSIZE = 10_000
|
|
24
32
|
DEFAULT_REQUEST_VERIFY = True
|
|
25
|
-
DEFAULT_INSECURE = False
|
|
26
33
|
DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB = 100
|
|
27
34
|
DEFAULT_ARIZE_DIRECTORY = "~/.arize"
|
|
28
35
|
DEFAULT_ENABLE_CACHING = True
|