arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +28 -19
- arize/_exporter/client.py +56 -37
- arize/_exporter/parsers/tracing_data_parser.py +41 -30
- arize/_exporter/validation.py +3 -3
- arize/_flight/client.py +207 -76
- arize/_generated/api_client/__init__.py +30 -6
- arize/_generated/api_client/api/__init__.py +1 -0
- arize/_generated/api_client/api/datasets_api.py +864 -190
- arize/_generated/api_client/api/experiments_api.py +167 -131
- arize/_generated/api_client/api/projects_api.py +1197 -0
- arize/_generated/api_client/api_client.py +2 -2
- arize/_generated/api_client/configuration.py +42 -34
- arize/_generated/api_client/exceptions.py +2 -2
- arize/_generated/api_client/models/__init__.py +15 -4
- arize/_generated/api_client/models/dataset.py +10 -10
- arize/_generated/api_client/models/dataset_example.py +111 -0
- arize/_generated/api_client/models/dataset_example_update.py +100 -0
- arize/_generated/api_client/models/dataset_version.py +13 -13
- arize/_generated/api_client/models/datasets_create_request.py +16 -8
- arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
- arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
- arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
- arize/_generated/api_client/models/datasets_list200_response.py +10 -4
- arize/_generated/api_client/models/experiment.py +14 -16
- arize/_generated/api_client/models/experiment_run.py +108 -0
- arize/_generated/api_client/models/experiment_run_create.py +102 -0
- arize/_generated/api_client/models/experiments_create_request.py +16 -10
- arize/_generated/api_client/models/experiments_list200_response.py +10 -4
- arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
- arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
- arize/_generated/api_client/models/primitive_value.py +172 -0
- arize/_generated/api_client/models/problem.py +100 -0
- arize/_generated/api_client/models/project.py +99 -0
- arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
- arize/_generated/api_client/models/projects_list200_response.py +106 -0
- arize/_generated/api_client/rest.py +2 -2
- arize/_generated/api_client/test/test_dataset.py +4 -2
- arize/_generated/api_client/test/test_dataset_example.py +56 -0
- arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
- arize/_generated/api_client/test/test_dataset_version.py +7 -2
- arize/_generated/api_client/test/test_datasets_api.py +27 -13
- arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
- arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
- arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
- arize/_generated/api_client/test/test_experiment.py +2 -4
- arize/_generated/api_client/test/test_experiment_run.py +56 -0
- arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
- arize/_generated/api_client/test/test_experiments_api.py +6 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
- arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
- arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
- arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
- arize/_generated/api_client/test/test_problem.py +57 -0
- arize/_generated/api_client/test/test_project.py +58 -0
- arize/_generated/api_client/test/test_projects_api.py +59 -0
- arize/_generated/api_client/test/test_projects_create_request.py +54 -0
- arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
- arize/_generated/api_client_README.md +43 -29
- arize/_generated/protocol/flight/flight_pb2.py +400 -0
- arize/_lazy.py +27 -19
- arize/client.py +181 -58
- arize/config.py +324 -116
- arize/constants/__init__.py +1 -0
- arize/constants/config.py +11 -4
- arize/constants/ml.py +6 -4
- arize/constants/openinference.py +2 -0
- arize/constants/pyarrow.py +2 -0
- arize/constants/spans.py +3 -1
- arize/datasets/__init__.py +1 -0
- arize/datasets/client.py +304 -84
- arize/datasets/errors.py +32 -2
- arize/datasets/validation.py +18 -8
- arize/embeddings/__init__.py +2 -0
- arize/embeddings/auto_generator.py +23 -19
- arize/embeddings/base_generators.py +89 -36
- arize/embeddings/constants.py +2 -0
- arize/embeddings/cv_generators.py +26 -4
- arize/embeddings/errors.py +27 -5
- arize/embeddings/nlp_generators.py +43 -18
- arize/embeddings/tabular_generators.py +46 -31
- arize/embeddings/usecases.py +12 -2
- arize/exceptions/__init__.py +1 -0
- arize/exceptions/auth.py +11 -1
- arize/exceptions/base.py +29 -4
- arize/exceptions/models.py +21 -2
- arize/exceptions/parameters.py +31 -0
- arize/exceptions/spaces.py +12 -1
- arize/exceptions/types.py +86 -7
- arize/exceptions/values.py +220 -20
- arize/experiments/__init__.py +13 -0
- arize/experiments/client.py +394 -285
- arize/experiments/evaluators/__init__.py +1 -0
- arize/experiments/evaluators/base.py +74 -41
- arize/experiments/evaluators/exceptions.py +6 -3
- arize/experiments/evaluators/executors.py +121 -73
- arize/experiments/evaluators/rate_limiters.py +106 -57
- arize/experiments/evaluators/types.py +34 -7
- arize/experiments/evaluators/utils.py +65 -27
- arize/experiments/functions.py +103 -101
- arize/experiments/tracing.py +52 -44
- arize/experiments/types.py +56 -31
- arize/logging.py +54 -22
- arize/ml/__init__.py +1 -0
- arize/ml/batch_validation/__init__.py +1 -0
- arize/{models → ml}/batch_validation/errors.py +545 -67
- arize/{models → ml}/batch_validation/validator.py +344 -303
- arize/ml/bounded_executor.py +47 -0
- arize/{models → ml}/casting.py +118 -108
- arize/{models → ml}/client.py +339 -118
- arize/{models → ml}/proto.py +97 -42
- arize/{models → ml}/stream_validation.py +43 -15
- arize/ml/surrogate_explainer/__init__.py +1 -0
- arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
- arize/{types.py → ml/types.py} +355 -354
- arize/pre_releases.py +44 -0
- arize/projects/__init__.py +1 -0
- arize/projects/client.py +134 -0
- arize/regions.py +40 -0
- arize/spans/__init__.py +1 -0
- arize/spans/client.py +204 -175
- arize/spans/columns.py +13 -0
- arize/spans/conversion.py +60 -37
- arize/spans/validation/__init__.py +1 -0
- arize/spans/validation/annotations/__init__.py +1 -0
- arize/spans/validation/annotations/annotations_validation.py +6 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
- arize/spans/validation/annotations/value_validation.py +35 -11
- arize/spans/validation/common/__init__.py +1 -0
- arize/spans/validation/common/argument_validation.py +33 -8
- arize/spans/validation/common/dataframe_form_validation.py +35 -9
- arize/spans/validation/common/errors.py +211 -11
- arize/spans/validation/common/value_validation.py +81 -14
- arize/spans/validation/evals/__init__.py +1 -0
- arize/spans/validation/evals/dataframe_form_validation.py +28 -8
- arize/spans/validation/evals/evals_validation.py +34 -4
- arize/spans/validation/evals/value_validation.py +26 -3
- arize/spans/validation/metadata/__init__.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +14 -5
- arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
- arize/spans/validation/metadata/value_validation.py +24 -10
- arize/spans/validation/spans/__init__.py +1 -0
- arize/spans/validation/spans/dataframe_form_validation.py +35 -14
- arize/spans/validation/spans/spans_validation.py +35 -4
- arize/spans/validation/spans/value_validation.py +78 -8
- arize/utils/__init__.py +1 -0
- arize/utils/arrow.py +31 -15
- arize/utils/cache.py +34 -6
- arize/utils/dataframe.py +20 -3
- arize/utils/online_tasks/__init__.py +2 -0
- arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
- arize/utils/openinference_conversion.py +44 -5
- arize/utils/proto.py +10 -0
- arize/utils/size.py +5 -3
- arize/utils/types.py +105 -0
- arize/version.py +3 -1
- {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
- arize-8.0.0b0.dist-info/RECORD +175 -0
- {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
- arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
- arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
- arize/_generated/protocol/flight/export_pb2.py +0 -61
- arize/_generated/protocol/flight/ingest_pb2.py +0 -365
- arize/models/__init__.py +0 -0
- arize/models/batch_validation/__init__.py +0 -0
- arize/models/bounded_executor.py +0 -34
- arize/models/surrogate_explainer/__init__.py +0 -0
- arize-8.0.0a22.dist-info/RECORD +0 -146
- arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
arize/config.py
CHANGED
|
@@ -1,207 +1,418 @@
|
|
|
1
|
+
"""SDK configuration and settings management for the Arize client."""
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import os
|
|
3
5
|
import sys
|
|
4
6
|
import threading
|
|
5
7
|
from dataclasses import dataclass, field, fields
|
|
6
8
|
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
8
10
|
|
|
9
11
|
from arize.constants.config import (
|
|
10
12
|
DEFAULT_API_HOST,
|
|
13
|
+
DEFAULT_API_SCHEME,
|
|
11
14
|
DEFAULT_ARIZE_DIRECTORY,
|
|
12
15
|
DEFAULT_ENABLE_CACHING,
|
|
13
16
|
DEFAULT_FLIGHT_HOST,
|
|
14
17
|
DEFAULT_FLIGHT_PORT,
|
|
15
|
-
|
|
16
|
-
DEFAULT_INSECURE,
|
|
18
|
+
DEFAULT_FLIGHT_SCHEME,
|
|
17
19
|
DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
18
20
|
DEFAULT_OTLP_HOST,
|
|
21
|
+
DEFAULT_OTLP_SCHEME,
|
|
19
22
|
DEFAULT_PYARROW_MAX_CHUNKSIZE,
|
|
20
23
|
DEFAULT_REQUEST_VERIFY,
|
|
21
24
|
DEFAULT_STREAM_MAX_QUEUE_BOUND,
|
|
22
25
|
DEFAULT_STREAM_MAX_WORKERS,
|
|
23
26
|
ENV_API_HOST,
|
|
24
27
|
ENV_API_KEY,
|
|
28
|
+
ENV_API_SCHEME,
|
|
25
29
|
ENV_ARIZE_DIRECTORY,
|
|
26
30
|
ENV_ENABLE_CACHING,
|
|
27
31
|
ENV_FLIGHT_HOST,
|
|
28
32
|
ENV_FLIGHT_PORT,
|
|
29
|
-
|
|
30
|
-
ENV_INSECURE,
|
|
33
|
+
ENV_FLIGHT_SCHEME,
|
|
31
34
|
ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
32
35
|
ENV_OTLP_HOST,
|
|
36
|
+
ENV_OTLP_SCHEME,
|
|
33
37
|
ENV_PYARROW_MAX_CHUNKSIZE,
|
|
38
|
+
ENV_REGION,
|
|
34
39
|
ENV_REQUEST_VERIFY,
|
|
40
|
+
ENV_SINGLE_HOST,
|
|
41
|
+
ENV_SINGLE_PORT,
|
|
35
42
|
ENV_STREAM_MAX_QUEUE_BOUND,
|
|
36
43
|
ENV_STREAM_MAX_WORKERS,
|
|
37
44
|
)
|
|
38
45
|
from arize.constants.pyarrow import MAX_CHUNKSIZE
|
|
39
46
|
from arize.exceptions.auth import MissingAPIKeyError
|
|
47
|
+
from arize.regions import REGION_ENDPOINTS, Region
|
|
40
48
|
from arize.version import __version__
|
|
41
49
|
|
|
42
50
|
logger = logging.getLogger(__name__)
|
|
43
51
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
return val
|
|
48
|
-
return (val or "").strip().lower() in {"1", "true", "yes", "on"}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def _api_key_factory() -> str:
|
|
52
|
-
return os.getenv(ENV_API_KEY, "")
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def _api_host_factory() -> str:
|
|
56
|
-
return os.getenv(ENV_API_HOST, DEFAULT_API_HOST)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def _api_scheme_factory() -> str:
|
|
60
|
-
insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
|
|
61
|
-
if insecure:
|
|
62
|
-
return "http"
|
|
63
|
-
return "https"
|
|
52
|
+
PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
|
53
|
+
SENSITIVE_FIELD_MARKERS = ("key", "token", "secret")
|
|
54
|
+
ALLOWED_HTTP_SCHEMES = {"http", "https"}
|
|
64
55
|
|
|
65
56
|
|
|
66
|
-
def
|
|
67
|
-
|
|
57
|
+
def _is_sensitive_field(name: str) -> bool:
|
|
58
|
+
n = name.lower()
|
|
59
|
+
return bool(any(k in n for k in SENSITIVE_FIELD_MARKERS))
|
|
68
60
|
|
|
69
61
|
|
|
70
|
-
def
|
|
71
|
-
|
|
62
|
+
def _mask_secret(secret: str, N: int = 4) -> str:
|
|
63
|
+
"""Show first N chars then '***'; empty string if empty."""
|
|
64
|
+
if len(secret) == 0:
|
|
65
|
+
return ""
|
|
66
|
+
return f"{secret[:N]}***"
|
|
72
67
|
|
|
73
68
|
|
|
74
|
-
def
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
69
|
+
def _endpoint(scheme: str, base: str, path: str = "") -> str:
|
|
70
|
+
endpoint = scheme + "://" + base.rstrip("/")
|
|
71
|
+
if path:
|
|
72
|
+
endpoint += "/" + path.lstrip("/")
|
|
73
|
+
return endpoint
|
|
78
74
|
|
|
79
75
|
|
|
80
|
-
def
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
)
|
|
84
|
-
if max_chunksize <= 0 or max_chunksize > MAX_CHUNKSIZE:
|
|
76
|
+
def _env_http_scheme(name: str, default: str) -> str:
|
|
77
|
+
v = _env_str(name, default).lower()
|
|
78
|
+
if v not in ALLOWED_HTTP_SCHEMES:
|
|
85
79
|
raise ValueError(
|
|
86
|
-
f"
|
|
80
|
+
f"{name} must be one of {sorted(ALLOWED_HTTP_SCHEMES)}. Found {v!r}"
|
|
87
81
|
)
|
|
88
|
-
return
|
|
89
|
-
|
|
82
|
+
return v
|
|
90
83
|
|
|
91
|
-
def _verify_factory() -> bool:
|
|
92
|
-
return _parse_bool(os.getenv(ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY))
|
|
93
84
|
|
|
85
|
+
def _env_str(
|
|
86
|
+
name: str,
|
|
87
|
+
default: str,
|
|
88
|
+
min_len: int | None = None,
|
|
89
|
+
max_len: int | None = None,
|
|
90
|
+
) -> str:
|
|
91
|
+
val = os.getenv(name, default).strip()
|
|
94
92
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def _stream_max_queue_bound_factory() -> int:
|
|
100
|
-
return int(
|
|
101
|
-
os.getenv(ENV_STREAM_MAX_QUEUE_BOUND, DEFAULT_STREAM_MAX_QUEUE_BOUND)
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _otlp_scheme_factory() -> str:
|
|
106
|
-
insecure = os.getenv(ENV_INSECURE, DEFAULT_INSECURE)
|
|
107
|
-
if insecure:
|
|
108
|
-
return "http"
|
|
109
|
-
return "https"
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def _otlp_host_factory() -> str:
|
|
113
|
-
return os.getenv(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def _max_http_payload_size_mb_factory() -> float:
|
|
117
|
-
return float(
|
|
118
|
-
os.getenv(
|
|
119
|
-
ENV_MAX_HTTP_PAYLOAD_SIZE_MB, DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB
|
|
93
|
+
if min_len is not None and len(val) < min_len:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"The value of environment variable {name} must be at least {min_len} "
|
|
96
|
+
f"characters long. Found {len(val)} characters."
|
|
120
97
|
)
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
98
|
+
if max_len is not None and len(val) > max_len:
|
|
99
|
+
raise ValueError(
|
|
100
|
+
f"The value of environment variable {name} must be at most {max_len} "
|
|
101
|
+
f"characters long. Found {len(val)} characters."
|
|
102
|
+
)
|
|
103
|
+
return val
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _env_int(
|
|
107
|
+
name: str,
|
|
108
|
+
default: int,
|
|
109
|
+
min_val: int | None = None,
|
|
110
|
+
max_val: int | None = None,
|
|
111
|
+
) -> int:
|
|
112
|
+
raw = os.getenv(name, default)
|
|
113
|
+
try:
|
|
114
|
+
val = int(raw)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
raise ValueError(
|
|
117
|
+
f"Environment variable {name} must be an int. Found: {raw!r}"
|
|
118
|
+
) from e
|
|
126
119
|
|
|
120
|
+
if min_val is not None and val < min_val:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
f"The value of environment variable {name} must be at least {min_val}. Found {val}."
|
|
123
|
+
)
|
|
124
|
+
if max_val is not None and val > max_val:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"The value of environment variable {name} must be at most {max_val}. Found {val}."
|
|
127
|
+
)
|
|
128
|
+
return val
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _env_float(
|
|
132
|
+
name: str,
|
|
133
|
+
default: float,
|
|
134
|
+
min_val: float | None = None,
|
|
135
|
+
max_val: float | None = None,
|
|
136
|
+
) -> float:
|
|
137
|
+
raw = os.getenv(name, default)
|
|
138
|
+
try:
|
|
139
|
+
val = float(raw)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Environment variable {name} must be a float. Found: {raw!r}"
|
|
143
|
+
) from e
|
|
127
144
|
|
|
128
|
-
|
|
129
|
-
|
|
145
|
+
if min_val is not None and val < min_val:
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"The value of environment variable {name} must be at least {min_val}. Found {val}."
|
|
148
|
+
)
|
|
149
|
+
if max_val is not None and val > max_val:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
f"The value of environment variable {name} must be at most {max_val}. Found {val}."
|
|
152
|
+
)
|
|
153
|
+
return val
|
|
130
154
|
|
|
131
155
|
|
|
132
|
-
def
|
|
133
|
-
|
|
134
|
-
return f"{secret[:N]}***"
|
|
156
|
+
def _env_bool(name: str, default: bool) -> bool:
|
|
157
|
+
return _parse_bool(os.getenv(name, str(default)))
|
|
135
158
|
|
|
136
159
|
|
|
137
|
-
def
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
return endpoint
|
|
160
|
+
def _parse_bool(val: bool | str | None) -> bool:
|
|
161
|
+
if isinstance(val, bool):
|
|
162
|
+
return val
|
|
163
|
+
return (val or "").strip().lower() in {"1", "true", "yes", "on"}
|
|
142
164
|
|
|
143
165
|
|
|
144
166
|
@dataclass(frozen=True)
|
|
145
167
|
class SDKConfiguration:
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
168
|
+
"""Configuration for the Arize SDK with endpoint and authentication settings.
|
|
169
|
+
|
|
170
|
+
This class is used internally by ArizeClient to manage SDK configuration. It is not
|
|
171
|
+
recommended to use this class directly; users should interact with ArizeClient
|
|
172
|
+
instead.
|
|
173
|
+
|
|
174
|
+
Each configuration parameter follows this resolution order:
|
|
175
|
+
1. Explicit value passed to ArizeClient constructor (highest priority)
|
|
176
|
+
2. Environment variable value
|
|
177
|
+
3. Built-in default value (lowest priority)
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
api_key: Arize API key for authentication. Required.
|
|
181
|
+
Environment variable: ARIZE_API_KEY.
|
|
182
|
+
Default: None (must be provided via argument or environment variable).
|
|
183
|
+
api_host: API endpoint host.
|
|
184
|
+
Environment variable: ARIZE_API_HOST.
|
|
185
|
+
Default: "api.arize.com".
|
|
186
|
+
api_scheme: API endpoint scheme (http/https).
|
|
187
|
+
Environment variable: ARIZE_API_SCHEME.
|
|
188
|
+
Default: "https".
|
|
189
|
+
otlp_host: OTLP (OpenTelemetry Protocol) endpoint host.
|
|
190
|
+
Environment variable: ARIZE_OTLP_HOST.
|
|
191
|
+
Default: "otlp.arize.com".
|
|
192
|
+
otlp_scheme: OTLP endpoint scheme (http/https).
|
|
193
|
+
Environment variable: ARIZE_OTLP_SCHEME.
|
|
194
|
+
Default: "https".
|
|
195
|
+
flight_host: Apache Arrow Flight endpoint host.
|
|
196
|
+
Environment variable: ARIZE_FLIGHT_HOST.
|
|
197
|
+
Default: "flight.arize.com".
|
|
198
|
+
flight_port: Apache Arrow Flight endpoint port (1-65535).
|
|
199
|
+
Environment variable: ARIZE_FLIGHT_PORT.
|
|
200
|
+
Default: 443.
|
|
201
|
+
flight_scheme: Apache Arrow Flight endpoint scheme.
|
|
202
|
+
Environment variable: ARIZE_FLIGHT_SCHEME.
|
|
203
|
+
Default: "grpc+tls".
|
|
204
|
+
pyarrow_max_chunksize: Maximum chunk size for PyArrow operations (1 to MAX_CHUNKSIZE).
|
|
205
|
+
Environment variable: ARIZE_MAX_CHUNKSIZE.
|
|
206
|
+
Default: 10_000.
|
|
207
|
+
request_verify: Whether to verify SSL certificates for HTTP requests.
|
|
208
|
+
Environment variable: ARIZE_REQUEST_VERIFY.
|
|
209
|
+
Default: True.
|
|
210
|
+
stream_max_workers: Maximum number of worker threads for streaming operations (minimum: 1).
|
|
211
|
+
Environment variable: ARIZE_STREAM_MAX_WORKERS.
|
|
212
|
+
Default: 8.
|
|
213
|
+
stream_max_queue_bound: Maximum queue size for streaming operations (minimum: 1).
|
|
214
|
+
Environment variable: ARIZE_STREAM_MAX_QUEUE_BOUND.
|
|
215
|
+
Default: 5000.
|
|
216
|
+
max_http_payload_size_mb: Maximum HTTP payload size in megabytes (minimum: 1).
|
|
217
|
+
Environment variable: ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB.
|
|
218
|
+
Default: 100.
|
|
219
|
+
arize_directory: Directory for Arize SDK files (cache, logs, etc.).
|
|
220
|
+
Environment variable: ARIZE_DIRECTORY.
|
|
221
|
+
Default: "~/.arize".
|
|
222
|
+
enable_caching: Whether to enable local caching.
|
|
223
|
+
Environment variable: ARIZE_ENABLE_CACHING.
|
|
224
|
+
Default: True.
|
|
225
|
+
region: Arize region (e.g., US_CENTRAL, EU_WEST). When specified, overrides
|
|
226
|
+
individual host/port settings.
|
|
227
|
+
Environment variable: ARIZE_REGION.
|
|
228
|
+
Default: Region.UNSPECIFIED.
|
|
229
|
+
single_host: Single host to use for all endpoints. Overrides individual host settings.
|
|
230
|
+
Environment variable: ARIZE_SINGLE_HOST.
|
|
231
|
+
Default: "" (not set).
|
|
232
|
+
single_port: Single port to use for all endpoints. Overrides individual port settings (0-65535).
|
|
233
|
+
Environment variable: ARIZE_SINGLE_PORT.
|
|
234
|
+
Default: 0 (not set).
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
api_key: str = field(
|
|
238
|
+
default_factory=lambda: _env_str(ENV_API_KEY, ""),
|
|
239
|
+
)
|
|
240
|
+
api_host: str = field(
|
|
241
|
+
default_factory=lambda: _env_str(ENV_API_HOST, DEFAULT_API_HOST)
|
|
242
|
+
)
|
|
243
|
+
api_scheme: str = field(
|
|
244
|
+
default_factory=lambda: _env_http_scheme(
|
|
245
|
+
ENV_API_SCHEME,
|
|
246
|
+
DEFAULT_API_SCHEME,
|
|
247
|
+
),
|
|
248
|
+
)
|
|
249
|
+
otlp_host: str = field(
|
|
250
|
+
default_factory=lambda: _env_str(ENV_OTLP_HOST, DEFAULT_OTLP_HOST)
|
|
251
|
+
)
|
|
252
|
+
otlp_scheme: str = field(
|
|
253
|
+
default_factory=lambda: _env_http_scheme(
|
|
254
|
+
ENV_OTLP_SCHEME,
|
|
255
|
+
DEFAULT_OTLP_SCHEME,
|
|
256
|
+
),
|
|
257
|
+
)
|
|
258
|
+
flight_host: str = field(
|
|
259
|
+
default_factory=lambda: _env_str(ENV_FLIGHT_HOST, DEFAULT_FLIGHT_HOST)
|
|
260
|
+
)
|
|
261
|
+
flight_port: int = field(
|
|
262
|
+
default_factory=lambda: _env_int(
|
|
263
|
+
ENV_FLIGHT_PORT, DEFAULT_FLIGHT_PORT, min_val=1, max_val=65535
|
|
264
|
+
)
|
|
265
|
+
)
|
|
266
|
+
flight_scheme: str = field(
|
|
267
|
+
default_factory=lambda: _env_str(
|
|
268
|
+
ENV_FLIGHT_SCHEME,
|
|
269
|
+
DEFAULT_FLIGHT_SCHEME,
|
|
270
|
+
),
|
|
271
|
+
)
|
|
272
|
+
pyarrow_max_chunksize: int = field(
|
|
273
|
+
default_factory=lambda: _env_int(
|
|
274
|
+
ENV_PYARROW_MAX_CHUNKSIZE,
|
|
275
|
+
DEFAULT_PYARROW_MAX_CHUNKSIZE,
|
|
276
|
+
min_val=1,
|
|
277
|
+
max_val=MAX_CHUNKSIZE,
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
request_verify: bool = field(
|
|
281
|
+
default_factory=lambda: _env_bool(
|
|
282
|
+
ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
stream_max_workers: int = field(
|
|
286
|
+
default_factory=lambda: _env_int(
|
|
287
|
+
ENV_STREAM_MAX_WORKERS, DEFAULT_STREAM_MAX_WORKERS, min_val=1
|
|
288
|
+
)
|
|
289
|
+
)
|
|
157
290
|
stream_max_queue_bound: int = field(
|
|
158
|
-
default_factory=
|
|
291
|
+
default_factory=lambda: _env_int(
|
|
292
|
+
ENV_STREAM_MAX_QUEUE_BOUND,
|
|
293
|
+
DEFAULT_STREAM_MAX_QUEUE_BOUND,
|
|
294
|
+
min_val=1,
|
|
295
|
+
)
|
|
159
296
|
)
|
|
160
297
|
max_http_payload_size_mb: float = field(
|
|
161
|
-
default_factory=
|
|
298
|
+
default_factory=lambda: _env_float(
|
|
299
|
+
ENV_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
300
|
+
DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB,
|
|
301
|
+
min_val=1,
|
|
302
|
+
)
|
|
303
|
+
)
|
|
304
|
+
arize_directory: str = field(
|
|
305
|
+
default_factory=lambda: _env_str(
|
|
306
|
+
ENV_ARIZE_DIRECTORY, DEFAULT_ARIZE_DIRECTORY
|
|
307
|
+
)
|
|
308
|
+
)
|
|
309
|
+
enable_caching: bool = field(
|
|
310
|
+
default_factory=lambda: _env_bool(
|
|
311
|
+
ENV_ENABLE_CACHING, DEFAULT_ENABLE_CACHING
|
|
312
|
+
)
|
|
313
|
+
)
|
|
314
|
+
region: Region = field(
|
|
315
|
+
default_factory=lambda: Region(_env_str(ENV_REGION, ""))
|
|
316
|
+
)
|
|
317
|
+
single_host: str = field(
|
|
318
|
+
default_factory=lambda: _env_str(ENV_SINGLE_HOST, "")
|
|
319
|
+
)
|
|
320
|
+
single_port: int = field(
|
|
321
|
+
default_factory=lambda: _env_int(
|
|
322
|
+
ENV_SINGLE_PORT, 0, min_val=0, max_val=65535
|
|
323
|
+
)
|
|
162
324
|
)
|
|
163
|
-
arize_direcory: str = field(default_factory=_arize_dir_factory)
|
|
164
|
-
enable_caching: bool = field(default_factory=_enable_cache_factory)
|
|
165
325
|
|
|
166
326
|
# Private, excluded from comparisons & repr
|
|
167
|
-
_headers: Dict[str, str] = field(init=False, repr=False, compare=False)
|
|
168
327
|
_gen_client: Any = field(default=None, repr=False, compare=False)
|
|
169
328
|
_gen_lock: threading.Lock = field(
|
|
170
329
|
default_factory=threading.Lock, repr=False, compare=False
|
|
171
330
|
)
|
|
172
331
|
|
|
173
|
-
def __post_init__(self):
|
|
332
|
+
def __post_init__(self) -> None:
|
|
333
|
+
"""Validate and configure SDK endpoints after initialization.
|
|
334
|
+
|
|
335
|
+
Raises:
|
|
336
|
+
MissingAPIKeyError: If API key is not provided.
|
|
337
|
+
"""
|
|
174
338
|
# Validate Configuration
|
|
175
339
|
if not self.api_key:
|
|
176
340
|
raise MissingAPIKeyError()
|
|
177
341
|
|
|
342
|
+
has_single_host = bool(self.single_host)
|
|
343
|
+
has_single_port = self.single_port != 0
|
|
344
|
+
has_region = self.region is not Region.UNSPECIFIED
|
|
345
|
+
if (has_single_host or has_single_port) and has_region:
|
|
346
|
+
logger.info(
|
|
347
|
+
"Multiple endpoint override options provided. Preference order is: "
|
|
348
|
+
"region > single_host/single_port > per-endpoint host/port."
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Single host override: if single_host is set, it overrides hosts
|
|
352
|
+
if has_single_host:
|
|
353
|
+
logger.info(
|
|
354
|
+
"Single host %r provided; overriding hosts configuration with single host.",
|
|
355
|
+
self.single_host,
|
|
356
|
+
)
|
|
357
|
+
object.__setattr__(self, "api_host", self.single_host)
|
|
358
|
+
object.__setattr__(self, "otlp_host", self.single_host)
|
|
359
|
+
object.__setattr__(self, "flight_host", self.single_host)
|
|
360
|
+
|
|
361
|
+
# Single port override: if single_port is set, it overrides ports
|
|
362
|
+
if has_single_port:
|
|
363
|
+
logger.info(
|
|
364
|
+
"Single port %s provided; overriding ports configuration with single port.",
|
|
365
|
+
self.single_port,
|
|
366
|
+
)
|
|
367
|
+
object.__setattr__(self, "flight_port", self.single_port)
|
|
368
|
+
|
|
369
|
+
# Region override: if region is set, it *always* wins over host/port fields
|
|
370
|
+
if has_region:
|
|
371
|
+
endpoints = REGION_ENDPOINTS[self.region]
|
|
372
|
+
|
|
373
|
+
# Override config (region trumps everything)
|
|
374
|
+
logger.info(
|
|
375
|
+
"Region %s provided; overriding hosts & ports configuration with region defaults.",
|
|
376
|
+
self.region.value,
|
|
377
|
+
)
|
|
378
|
+
object.__setattr__(self, "api_host", endpoints.api_host)
|
|
379
|
+
object.__setattr__(self, "otlp_host", endpoints.otlp_host)
|
|
380
|
+
object.__setattr__(self, "flight_host", endpoints.flight_host)
|
|
381
|
+
object.__setattr__(self, "flight_port", endpoints.flight_port)
|
|
382
|
+
|
|
178
383
|
@property
|
|
179
384
|
def cache_dir(self) -> str:
|
|
180
|
-
|
|
385
|
+
"""Return the path to the cache directory."""
|
|
386
|
+
return str(Path(self.arize_directory) / "cache")
|
|
181
387
|
|
|
182
388
|
@property
|
|
183
389
|
def api_url(self) -> str:
|
|
390
|
+
"""Return the base API URL."""
|
|
184
391
|
return _endpoint(self.api_scheme, self.api_host)
|
|
185
392
|
|
|
186
393
|
@property
|
|
187
394
|
def otlp_url(self) -> str:
|
|
395
|
+
"""Return the OTLP endpoint URL."""
|
|
188
396
|
return _endpoint(self.otlp_scheme, self.otlp_host, "/v1")
|
|
189
397
|
|
|
190
398
|
@property
|
|
191
399
|
def files_url(self) -> str:
|
|
400
|
+
"""Return the files upload endpoint URL."""
|
|
192
401
|
return _endpoint(self.api_scheme, self.api_host, "/v1/pandas_arrow")
|
|
193
402
|
|
|
194
403
|
@property
|
|
195
404
|
def records_url(self) -> str:
|
|
405
|
+
"""Return the records logging endpoint URL."""
|
|
196
406
|
return _endpoint(self.api_scheme, self.api_host, "/v1/log")
|
|
197
407
|
|
|
198
408
|
@property
|
|
199
|
-
def headers(self) ->
|
|
409
|
+
def headers(self) -> dict[str, str]:
|
|
410
|
+
"""Return HTTP headers for API requests."""
|
|
200
411
|
# Create base headers
|
|
201
412
|
return {
|
|
202
413
|
"authorization": self.api_key,
|
|
203
414
|
"sdk-language": "python",
|
|
204
|
-
"language-version":
|
|
415
|
+
"language-version": PYTHON_VERSION,
|
|
205
416
|
"sdk-version": __version__,
|
|
206
417
|
# "arize-space-id": self._space_id,
|
|
207
418
|
# "arize-interface": "batch",
|
|
@@ -209,31 +420,34 @@ class SDKConfiguration:
|
|
|
209
420
|
}
|
|
210
421
|
|
|
211
422
|
@property
|
|
212
|
-
def headers_grpc(self) ->
|
|
423
|
+
def headers_grpc(self) -> dict[str, str]:
|
|
424
|
+
"""Return headers for gRPC requests."""
|
|
213
425
|
return {
|
|
214
426
|
"authorization": self.api_key,
|
|
215
427
|
"Grpc-Metadata-sdk-language": "python",
|
|
216
|
-
"Grpc-Metadata-language-version":
|
|
428
|
+
"Grpc-Metadata-language-version": PYTHON_VERSION,
|
|
217
429
|
"Grpc-Metadata-sdk-version": __version__,
|
|
218
430
|
# "Grpc-Metadata-arize-space-id": space_id,
|
|
219
431
|
# "Grpc-Metadata-arize-interface": "stream",
|
|
220
432
|
}
|
|
221
433
|
|
|
222
434
|
def __repr__(self) -> str:
|
|
435
|
+
"""Return a detailed string representation with masked sensitive fields."""
|
|
223
436
|
# Dynamically build repr for all fields
|
|
224
437
|
lines = [f"{self.__class__.__name__}("]
|
|
225
438
|
for f in fields(self):
|
|
226
439
|
if not f.repr:
|
|
227
440
|
continue
|
|
228
441
|
val = getattr(self, f.name)
|
|
229
|
-
if f.name
|
|
442
|
+
if _is_sensitive_field(f.name):
|
|
230
443
|
val = _mask_secret(val, 6)
|
|
231
444
|
lines.append(f" {f.name}={val!r},")
|
|
232
445
|
lines.append(")")
|
|
233
446
|
return "\n".join(lines)
|
|
234
447
|
|
|
235
448
|
# TODO(Kiko): This may not be well placed in this class
|
|
236
|
-
def get_generated_client(self):
|
|
449
|
+
def get_generated_client(self) -> object:
|
|
450
|
+
"""Get or create the generated OpenAPI client instance."""
|
|
237
451
|
# If already cached, return immediately
|
|
238
452
|
if self._gen_client is not None:
|
|
239
453
|
return self._gen_client
|
|
@@ -243,21 +457,15 @@ class SDKConfiguration:
|
|
|
243
457
|
if self._gen_client is not None:
|
|
244
458
|
return self._gen_client
|
|
245
459
|
|
|
246
|
-
# Import lazily so
|
|
460
|
+
# Import lazily so extra dependencies can be
|
|
461
|
+
# enforced outside the configuration class
|
|
247
462
|
from arize._generated import api_client as gen
|
|
248
463
|
|
|
249
464
|
cfg = gen.Configuration(host=self.api_url)
|
|
250
465
|
if self.api_key:
|
|
251
|
-
cfg.
|
|
466
|
+
cfg.access_token = self.api_key
|
|
252
467
|
client = gen.ApiClient(cfg)
|
|
253
468
|
|
|
254
469
|
# Bypass frozen to set the cache once
|
|
255
470
|
object.__setattr__(self, "_gen_client", client)
|
|
256
471
|
return client
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
def get_python_version():
|
|
260
|
-
return (
|
|
261
|
-
f"{sys.version_info.major}.{sys.version_info.minor}."
|
|
262
|
-
f"{sys.version_info.micro}"
|
|
263
|
-
)
|
arize/constants/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Constants and configuration values used across the Arize SDK."""
|
arize/constants/config.py
CHANGED
|
@@ -1,28 +1,35 @@
|
|
|
1
|
+
"""Configuration constants and environment variable names."""
|
|
2
|
+
|
|
1
3
|
# Authentication
|
|
2
4
|
ENV_API_KEY = "ARIZE_API_KEY"
|
|
3
5
|
|
|
4
6
|
# Server configuration env vars
|
|
7
|
+
ENV_REGION = "ARIZE_REGION"
|
|
5
8
|
ENV_API_HOST = "ARIZE_API_HOST"
|
|
9
|
+
ENV_API_SCHEME = "ARIZE_API_SCHEME"
|
|
6
10
|
ENV_OTLP_HOST = "ARIZE_OTLP_HOST"
|
|
11
|
+
ENV_OTLP_SCHEME = "ARIZE_OTLP_SCHEME"
|
|
7
12
|
ENV_FLIGHT_HOST = "ARIZE_FLIGHT_HOST"
|
|
8
13
|
ENV_FLIGHT_PORT = "ARIZE_FLIGHT_PORT"
|
|
9
|
-
|
|
14
|
+
ENV_FLIGHT_SCHEME = "ARIZE_FLIGHT_SCHEME"
|
|
15
|
+
ENV_SINGLE_HOST = "ARIZE_SINGLE_HOST"
|
|
16
|
+
ENV_SINGLE_PORT = "ARIZE_SINGLE_PORT"
|
|
10
17
|
ENV_PYARROW_MAX_CHUNKSIZE = "ARIZE_MAX_CHUNKSIZE"
|
|
11
18
|
ENV_REQUEST_VERIFY = "ARIZE_REQUEST_VERIFY"
|
|
12
|
-
ENV_INSECURE = "ARIZE_INSECURE"
|
|
13
19
|
ENV_MAX_HTTP_PAYLOAD_SIZE_MB = "ARIZE_MAX_HTTP_PAYLOAD_SIZE_MB"
|
|
14
20
|
ENV_ARIZE_DIRECTORY = "ARIZE_DIRECTORY"
|
|
15
21
|
ENV_ENABLE_CACHING = "ARIZE_ENABLE_CACHING"
|
|
16
22
|
|
|
17
23
|
# Server configuration default values
|
|
18
24
|
DEFAULT_API_HOST = "api.arize.com" # NOTE: Must not prefix with https://
|
|
25
|
+
DEFAULT_API_SCHEME = "https"
|
|
19
26
|
DEFAULT_OTLP_HOST = "otlp.arize.com" # NOTE: Must not prefix with https://
|
|
27
|
+
DEFAULT_OTLP_SCHEME = "https"
|
|
20
28
|
DEFAULT_FLIGHT_HOST = "flight.arize.com" # NOTE: Must not prefix with https://
|
|
21
29
|
DEFAULT_FLIGHT_PORT = 443
|
|
22
|
-
|
|
30
|
+
DEFAULT_FLIGHT_SCHEME = "grpc+tls"
|
|
23
31
|
DEFAULT_PYARROW_MAX_CHUNKSIZE = 10_000
|
|
24
32
|
DEFAULT_REQUEST_VERIFY = True
|
|
25
|
-
DEFAULT_INSECURE = False
|
|
26
33
|
DEFAULT_MAX_HTTP_PAYLOAD_SIZE_MB = 100
|
|
27
34
|
DEFAULT_ARIZE_DIRECTORY = "~/.arize"
|
|
28
35
|
DEFAULT_ENABLE_CACHING = True
|
arize/constants/ml.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Machine learning constants and validation limits."""
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
from pathlib import Path
|
|
3
5
|
|
|
@@ -30,7 +32,7 @@ MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
|
|
|
30
32
|
MAX_NUMBER_OF_EMBEDDINGS = 30
|
|
31
33
|
MAX_EMBEDDING_DIMENSIONALITY = 20_000
|
|
32
34
|
# # The maximum number of classes for multi class
|
|
33
|
-
MAX_NUMBER_OF_MULTI_CLASS_CLASSES =
|
|
35
|
+
MAX_NUMBER_OF_MULTI_CLASS_CLASSES = 500
|
|
34
36
|
MAX_MULTI_CLASS_NAME_LENGTH = 100
|
|
35
37
|
# The maximum number of references in embedding similarity search params
|
|
36
38
|
MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
|
|
@@ -40,9 +42,9 @@ MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
|
|
|
40
42
|
# GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
|
|
41
43
|
#
|
|
42
44
|
# # reserved columns for LLM run metadata
|
|
43
|
-
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count"
|
|
44
|
-
LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count"
|
|
45
|
-
LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count"
|
|
45
|
+
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count" # noqa: S105
|
|
46
|
+
LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count" # noqa: S105
|
|
47
|
+
LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count" # noqa: S105
|
|
46
48
|
LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
|
|
47
49
|
#
|
|
48
50
|
# all reserved tags
|
arize/constants/openinference.py
CHANGED
arize/constants/pyarrow.py
CHANGED
arize/constants/spans.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
"""Span-related constants and validation limits for tracing."""
|
|
2
|
+
|
|
3
|
+
# The default format used to parse datetime objects from strings
|
|
2
4
|
DEFAULT_DATETIME_FMT = "%Y-%m-%dT%H:%M:%S.%f+00:00"
|
|
3
5
|
# Minumum/Maximum number of characters for span/trace/parent ids in spans
|
|
4
6
|
SPAN_ID_MIN_STR_LENGTH = 12
|
arize/datasets/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Dataset management and validation utilities for the Arize SDK."""
|