arize 8.0.0a2__py3-none-any.whl → 8.0.0a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/_exporter/client.py +28 -8
- arize/_exporter/parsers/tracing_data_parser.py +7 -4
- arize/_exporter/validation.py +7 -3
- arize/_flight/client.py +11 -14
- arize/_lazy.py +38 -36
- arize/client.py +36 -4
- arize/config.py +37 -3
- arize/constants/config.py +6 -0
- arize/constants/ml.py +33 -31
- arize/constants/model_mapping.json +199 -0
- arize/exceptions/base.py +47 -42
- arize/exceptions/models.py +12 -0
- arize/exceptions/parameters.py +342 -324
- arize/exceptions/values.py +16 -0
- arize/logging.py +6 -6
- arize/models/__init__.py +0 -0
- arize/models/batch_validation/__init__.py +0 -0
- arize/models/batch_validation/errors.py +1145 -0
- arize/models/batch_validation/validator.py +3711 -0
- arize/models/bounded_executor.py +34 -0
- arize/models/client.py +807 -0
- arize/models/stream_validation.py +214 -0
- arize/spans/client.py +55 -188
- arize/spans/validation/annotations/annotations_validation.py +8 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +6 -2
- arize/spans/validation/annotations/value_validation.py +6 -3
- arize/spans/validation/common/argument_validation.py +5 -2
- arize/spans/validation/common/dataframe_form_validation.py +5 -2
- arize/spans/validation/evals/evals_validation.py +8 -4
- arize/spans/validation/evals/value_validation.py +8 -4
- arize/spans/validation/metadata/argument_validation.py +5 -2
- arize/spans/validation/spans/spans_validation.py +8 -4
- arize/spans/validation/spans/value_validation.py +8 -5
- arize/types.py +1421 -1366
- arize/utils/arrow.py +143 -2
- arize/utils/casting.py +396 -0
- arize/utils/proto.py +751 -310
- arize/version.py +1 -1
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/METADATA +165 -9
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/RECORD +43 -34
- /arize/utils/{pandas.py → dataframe.py} +0 -0
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/WHEEL +0 -0
- {arize-8.0.0a2.dist-info → arize-8.0.0a4.dist-info}/licenses/LICENSE.md +0 -0
arize/_exporter/client.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
# type: ignore[pb2]
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
2
|
import logging
|
|
5
3
|
from dataclasses import dataclass
|
|
6
4
|
from datetime import datetime
|
|
@@ -14,17 +12,14 @@ from google.protobuf.wrappers_pb2 import Int64Value
|
|
|
14
12
|
from pyarrow import flight
|
|
15
13
|
from tqdm import tqdm
|
|
16
14
|
|
|
17
|
-
from arize._exporter.parsers.tracing_data_parser import (
|
|
18
|
-
OtelTracingDataTransformer,
|
|
19
|
-
)
|
|
20
15
|
from arize._exporter.validation import (
|
|
21
16
|
validate_input_type,
|
|
22
|
-
validate_input_value,
|
|
23
17
|
validate_start_end_time,
|
|
24
18
|
)
|
|
25
19
|
from arize._generated.protocol.flight import export_pb2
|
|
20
|
+
from arize.logging import CtxAdapter
|
|
26
21
|
from arize.types import Environments, SimilaritySearchParams
|
|
27
|
-
from arize.utils.
|
|
22
|
+
from arize.utils.dataframe import reset_dataframe_index
|
|
28
23
|
from arize.utils.proto import get_pb_flight_doput_request
|
|
29
24
|
|
|
30
25
|
logger = logging.getLogger(__name__)
|
|
@@ -124,6 +119,10 @@ class ArizeExportClient:
|
|
|
124
119
|
df.drop(null_columns, axis=1, inplace=True)
|
|
125
120
|
|
|
126
121
|
if environment == Environments.TRACING:
|
|
122
|
+
from arize._exporter.parsers.tracing_data_parser import (
|
|
123
|
+
OtelTracingDataTransformer,
|
|
124
|
+
)
|
|
125
|
+
|
|
127
126
|
# by default, transform the exported tracing data so that it's
|
|
128
127
|
# easier to work with in Python
|
|
129
128
|
df = OtelTracingDataTransformer().transform(df)
|
|
@@ -238,6 +237,27 @@ class ArizeExportClient:
|
|
|
238
237
|
columns: List | None = None,
|
|
239
238
|
stream_chunk_size: int | None = None,
|
|
240
239
|
) -> Tuple[flight.FlightStreamReader, int]:
|
|
240
|
+
# Bind common context for this operation
|
|
241
|
+
log = CtxAdapter(
|
|
242
|
+
logger,
|
|
243
|
+
{
|
|
244
|
+
"component": "exporter",
|
|
245
|
+
"operation": "export_to_df",
|
|
246
|
+
"space_id": space_id,
|
|
247
|
+
"model_id": model_id,
|
|
248
|
+
"environment": environment.name,
|
|
249
|
+
"model_version": model_version,
|
|
250
|
+
"batch_id": batch_id,
|
|
251
|
+
"include_actuals": include_actuals,
|
|
252
|
+
"where": where,
|
|
253
|
+
"columns": columns,
|
|
254
|
+
"similarity_search_params": similarity_search_params,
|
|
255
|
+
"stream_chunk_size": stream_chunk_size,
|
|
256
|
+
"start_time": start_time,
|
|
257
|
+
"end_time": end_time,
|
|
258
|
+
},
|
|
259
|
+
)
|
|
260
|
+
log.debug("Getting stream reader...")
|
|
241
261
|
validate_input_type(space_id, "space_id", str)
|
|
242
262
|
validate_input_type(model_id, "model_id", str)
|
|
243
263
|
validate_input_type(environment, "environment", Environments)
|
|
@@ -281,7 +301,7 @@ class ArizeExportClient:
|
|
|
281
301
|
json_format.MessageToJson(query_descriptor) # type: ignore
|
|
282
302
|
),
|
|
283
303
|
)
|
|
284
|
-
logger.
|
|
304
|
+
logger.info("Fetching data...")
|
|
285
305
|
|
|
286
306
|
if flight_info.total_records == 0:
|
|
287
307
|
logger.warning("Query returns no data")
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
|
-
from typing import List
|
|
4
|
-
|
|
5
|
-
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
5
|
+
from typing import TYPE_CHECKING, List
|
|
7
6
|
|
|
8
7
|
from arize.spans.columns import (
|
|
9
8
|
SPAN_ATTRIBUTES_EMBEDDING_EMBEDDINGS_COL,
|
|
@@ -19,6 +18,10 @@ from arize.spans.columns import (
|
|
|
19
18
|
SPAN_START_TIME_COL,
|
|
20
19
|
)
|
|
21
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
import numpy as np
|
|
23
|
+
import pandas as pd
|
|
24
|
+
|
|
22
25
|
logger = logging.getLogger(__name__)
|
|
23
26
|
|
|
24
27
|
|
arize/_exporter/validation.py
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any
|
|
4
|
+
|
|
5
|
+
if TYPE_CHECKING:
|
|
6
|
+
from datetime import datetime
|
|
3
7
|
|
|
4
8
|
|
|
5
9
|
def validate_input_type(
|
|
@@ -35,6 +39,6 @@ def validate_input_value(
|
|
|
35
39
|
)
|
|
36
40
|
|
|
37
41
|
|
|
38
|
-
def validate_start_end_time(start_time, end_time: datetime) -> None:
|
|
42
|
+
def validate_start_end_time(start_time: datetime, end_time: datetime) -> None:
|
|
39
43
|
if start_time >= end_time:
|
|
40
44
|
raise ValueError("start_time must be before end_time")
|
arize/_flight/client.py
CHANGED
|
@@ -2,29 +2,28 @@
|
|
|
2
2
|
from __future__ import annotations
|
|
3
3
|
|
|
4
4
|
import base64
|
|
5
|
+
import logging
|
|
5
6
|
from dataclasses import dataclass, field
|
|
6
|
-
from typing import Any, Dict, List, Tuple
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Tuple
|
|
7
8
|
|
|
8
|
-
import pyarrow as pa
|
|
9
9
|
from google.protobuf import json_format
|
|
10
10
|
from pyarrow import flight
|
|
11
11
|
|
|
12
|
-
from arize._flight.types import FlightRequestType
|
|
13
12
|
from arize._generated.protocol.flight.ingest_pb2 import (
|
|
14
13
|
WriteSpanAnnotationResponse,
|
|
15
14
|
WriteSpanAttributesMetadataResponse,
|
|
16
15
|
WriteSpanEvaluationResponse,
|
|
17
16
|
)
|
|
18
17
|
from arize.config import get_python_version
|
|
19
|
-
from arize.constants.config import (
|
|
20
|
-
DEFAULT_FLIGHT_HOST,
|
|
21
|
-
DEFAULT_FLIGHT_PORT,
|
|
22
|
-
DEFAULT_FLIGHT_TRANSPORT_SCHEME,
|
|
23
|
-
)
|
|
24
18
|
from arize.logging import log_a_list
|
|
25
19
|
from arize.utils.proto import get_pb_flight_doput_request, get_pb_schema_tracing
|
|
26
20
|
from arize.version import __version__
|
|
27
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
import pyarrow as pa
|
|
24
|
+
|
|
25
|
+
from arize._flight.client import FlightRequestType
|
|
26
|
+
|
|
28
27
|
BytesPair = Tuple[bytes, bytes]
|
|
29
28
|
Headers = List[BytesPair]
|
|
30
29
|
WriteSpanResponse = (
|
|
@@ -32,7 +31,6 @@ WriteSpanResponse = (
|
|
|
32
31
|
| WriteSpanAnnotationResponse
|
|
33
32
|
| WriteSpanAttributesMetadataResponse
|
|
34
33
|
)
|
|
35
|
-
import logging
|
|
36
34
|
|
|
37
35
|
logger = logging.getLogger(__name__)
|
|
38
36
|
|
|
@@ -40,10 +38,10 @@ logger = logging.getLogger(__name__)
|
|
|
40
38
|
@dataclass(frozen=True)
|
|
41
39
|
class ArizeFlightClient:
|
|
42
40
|
api_key: str = field(repr=False)
|
|
43
|
-
host: str
|
|
44
|
-
port: int
|
|
45
|
-
scheme: str
|
|
46
|
-
request_verify: bool
|
|
41
|
+
host: str
|
|
42
|
+
port: int
|
|
43
|
+
scheme: str
|
|
44
|
+
request_verify: bool
|
|
47
45
|
|
|
48
46
|
# internal cache for the underlying FlightClient
|
|
49
47
|
_client: flight.FlightClient | None = field(
|
|
@@ -54,7 +52,6 @@ class ArizeFlightClient:
|
|
|
54
52
|
|
|
55
53
|
@property
|
|
56
54
|
def headers(self) -> Headers:
|
|
57
|
-
# Keep the typing simple: (bytes, bytes)
|
|
58
55
|
return [
|
|
59
56
|
(b"origin", b"arize-logging-client"),
|
|
60
57
|
(b"auth-token-bin", str(self.api_key).encode("utf-8")),
|
arize/_lazy.py
CHANGED
|
@@ -1,44 +1,13 @@
|
|
|
1
|
+
# src/arize/_lazy.py
|
|
1
2
|
from __future__ import annotations
|
|
2
3
|
|
|
3
4
|
import sys
|
|
4
5
|
import threading
|
|
5
6
|
from importlib import import_module
|
|
6
|
-
from typing import Any, Dict, Tuple
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, Tuple
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class OptionalDependencyError(ImportError): ...
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def _ensure_imports(
|
|
15
|
-
extra_key: str | None,
|
|
16
|
-
required: Tuple[str, ...],
|
|
17
|
-
pkgname="arize",
|
|
18
|
-
):
|
|
19
|
-
if not required:
|
|
20
|
-
return
|
|
21
|
-
missing = []
|
|
22
|
-
for p in required:
|
|
23
|
-
try:
|
|
24
|
-
import_module(p)
|
|
25
|
-
except Exception:
|
|
26
|
-
missing.append(p)
|
|
27
|
-
if missing:
|
|
28
|
-
raise OptionalDependencyError(
|
|
29
|
-
f"Missing optional dependencies: {', '.join(missing)}. "
|
|
30
|
-
f"Install via: pip install {pkgname}[{extra_key}]"
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def _dynamic_import(modname: str, retries: int = 2):
|
|
35
|
-
for i in range(retries):
|
|
36
|
-
try:
|
|
37
|
-
return import_module(modname)
|
|
38
|
-
except (ModuleNotFoundError, ImportError, KeyError):
|
|
39
|
-
sys.modules.pop(modname, None)
|
|
40
|
-
if i + 1 == retries:
|
|
41
|
-
raise
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from arize.config import SDKConfiguration
|
|
42
11
|
|
|
43
12
|
|
|
44
13
|
class LazySubclientsMixin:
|
|
@@ -63,7 +32,7 @@ class LazySubclientsMixin:
|
|
|
63
32
|
|
|
64
33
|
module_path, class_name = subs[name]
|
|
65
34
|
extra_key, required = self._EXTRAS.get(name, (None, ()))
|
|
66
|
-
|
|
35
|
+
require(extra_key, required)
|
|
67
36
|
|
|
68
37
|
module = _dynamic_import(module_path)
|
|
69
38
|
klass = getattr(module, class_name)
|
|
@@ -79,3 +48,36 @@ class LazySubclientsMixin:
|
|
|
79
48
|
|
|
80
49
|
def __dir__(self):
|
|
81
50
|
return sorted({*super().__dir__(), *self._SUBCLIENTS.keys()})
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class OptionalDependencyError(ImportError): ...
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def require(
|
|
57
|
+
extra_key: str | None,
|
|
58
|
+
required: Tuple[str, ...],
|
|
59
|
+
pkgname="arize",
|
|
60
|
+
):
|
|
61
|
+
if not required:
|
|
62
|
+
return
|
|
63
|
+
missing = []
|
|
64
|
+
for p in required:
|
|
65
|
+
try:
|
|
66
|
+
import_module(p)
|
|
67
|
+
except Exception:
|
|
68
|
+
missing.append(p)
|
|
69
|
+
if missing:
|
|
70
|
+
raise OptionalDependencyError(
|
|
71
|
+
f"Missing optional dependencies: {', '.join(missing)}. "
|
|
72
|
+
f"Install via: pip install {pkgname}[{extra_key}]"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _dynamic_import(modname: str, retries: int = 2):
|
|
77
|
+
for i in range(retries):
|
|
78
|
+
try:
|
|
79
|
+
return import_module(modname)
|
|
80
|
+
except (ModuleNotFoundError, ImportError, KeyError):
|
|
81
|
+
sys.modules.pop(modname, None)
|
|
82
|
+
if i + 1 == retries:
|
|
83
|
+
raise
|
arize/client.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# src/arize/client.py
|
|
2
1
|
from __future__ import annotations
|
|
3
2
|
|
|
4
3
|
from typing import TYPE_CHECKING
|
|
@@ -9,13 +8,16 @@ from arize.config import SDKConfiguration
|
|
|
9
8
|
if TYPE_CHECKING:
|
|
10
9
|
from arize.datasets.client import DatasetsClient
|
|
11
10
|
from arize.experiments.client import ExperimentsClient
|
|
11
|
+
from arize.models.client import MLModelsClient
|
|
12
12
|
from arize.spans.client import SpansClient
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
# TODO(Kiko):
|
|
16
|
-
# TODO(Kiko):
|
|
15
|
+
# TODO(Kiko): Go through main APIs and add CtxAdapter where missing
|
|
16
|
+
# TODO(Kiko): Search and handle other TODOs
|
|
17
|
+
# TODO(Kiko): Go over **every file** and do not import anything at runtime, use `if TYPE_CHECKING`
|
|
18
|
+
# with `from __future__ import annotations` (must include for Python < 3.11)
|
|
19
|
+
# TODO(Kiko): MIMIC Explainer not done
|
|
17
20
|
# TODO(Kiko): Go over docstrings
|
|
18
|
-
# TODO(Kiko): Missing a __repr__ method
|
|
19
21
|
class ArizeClient(LazySubclientsMixin):
|
|
20
22
|
"""
|
|
21
23
|
Root client for the Arize SDK. All parameters are optional. If not provided, they will be read
|
|
@@ -42,6 +44,7 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
42
44
|
"datasets": ("arize.datasets.client", "DatasetsClient"),
|
|
43
45
|
"experiments": ("arize.experiments.client", "ExperimentsClient"),
|
|
44
46
|
"spans": ("arize.spans.client", "SpansClient"),
|
|
47
|
+
"models": ("arize.models.client", "MLModelsClient"),
|
|
45
48
|
}
|
|
46
49
|
_EXTRAS = {
|
|
47
50
|
# Gate only the generated-backed ones
|
|
@@ -65,6 +68,10 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
65
68
|
"tqdm",
|
|
66
69
|
),
|
|
67
70
|
),
|
|
71
|
+
# Imports are gated in each method of the models client
|
|
72
|
+
# This is to allow for very lean package install if people only
|
|
73
|
+
# want to stream ML records
|
|
74
|
+
"models": (None, ()),
|
|
68
75
|
}
|
|
69
76
|
|
|
70
77
|
def __init__(
|
|
@@ -76,6 +83,8 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
76
83
|
flight_server_port: int | None = None,
|
|
77
84
|
flight_scheme: str | None = None,
|
|
78
85
|
request_verify: bool | None = None,
|
|
86
|
+
stream_max_workers: int | None = None,
|
|
87
|
+
stream_max_queue_bound: int | None = None,
|
|
79
88
|
):
|
|
80
89
|
cfg_kwargs: dict = {}
|
|
81
90
|
if api_key is not None:
|
|
@@ -92,6 +101,10 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
92
101
|
cfg_kwargs["flight_scheme"] = flight_scheme
|
|
93
102
|
if request_verify is not None:
|
|
94
103
|
cfg_kwargs["request_verify"] = request_verify
|
|
104
|
+
if stream_max_workers is not None:
|
|
105
|
+
cfg_kwargs["stream_max_workers"] = stream_max_workers
|
|
106
|
+
if stream_max_queue_bound is not None:
|
|
107
|
+
cfg_kwargs["stream_max_queue_bound"] = stream_max_queue_bound
|
|
95
108
|
|
|
96
109
|
# Only the explicitly provided fields are passed; the rest use
|
|
97
110
|
# SDKConfiguration’s default factories / defaults.
|
|
@@ -109,3 +122,22 @@ class ArizeClient(LazySubclientsMixin):
|
|
|
109
122
|
@property
|
|
110
123
|
def spans(self) -> SpansClient:
|
|
111
124
|
return self.__getattr__("spans")
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def models(self) -> MLModelsClient:
|
|
128
|
+
return self.__getattr__("models")
|
|
129
|
+
|
|
130
|
+
def __repr__(self) -> str:
|
|
131
|
+
lines = [f"{self.__class__.__name__}("]
|
|
132
|
+
# Indent the SDKConfiguration repr
|
|
133
|
+
cfg_repr = repr(self.sdk_config).splitlines()
|
|
134
|
+
lines.append(f" sdk_config={cfg_repr[0]}")
|
|
135
|
+
lines.extend(" " + line for line in cfg_repr[1:])
|
|
136
|
+
# Add subclient states
|
|
137
|
+
lines.append(" subclients={")
|
|
138
|
+
for name in self._SUBCLIENTS:
|
|
139
|
+
state = "loaded" if name in self._lazy_cache else "lazy"
|
|
140
|
+
lines.append(f" {name!r}: {state},")
|
|
141
|
+
lines.append(" }")
|
|
142
|
+
lines.append(")")
|
|
143
|
+
return "\n".join(lines)
|
arize/config.py
CHANGED
|
@@ -11,6 +11,8 @@ from arize.constants.config import (
|
|
|
11
11
|
DEFAULT_FLIGHT_PORT,
|
|
12
12
|
DEFAULT_FLIGHT_TRANSPORT_SCHEME,
|
|
13
13
|
DEFAULT_REQUEST_VERIFY,
|
|
14
|
+
DEFAULT_STREAM_MAX_QUEUE_BOUND,
|
|
15
|
+
DEFAULT_STREAM_MAX_WORKERS,
|
|
14
16
|
ENV_API_HOST,
|
|
15
17
|
ENV_API_INSECURE,
|
|
16
18
|
ENV_API_KEY,
|
|
@@ -18,6 +20,8 @@ from arize.constants.config import (
|
|
|
18
20
|
ENV_FLIGHT_PORT,
|
|
19
21
|
ENV_FLIGHT_TRANSPORT_SCHEME,
|
|
20
22
|
ENV_REQUEST_VERIFY,
|
|
23
|
+
ENV_STREAM_MAX_QUEUE_BOUND,
|
|
24
|
+
ENV_STREAM_MAX_WORKERS,
|
|
21
25
|
)
|
|
22
26
|
from arize.exceptions.auth import MissingAPIKeyError
|
|
23
27
|
from arize.version import __version__
|
|
@@ -62,6 +66,16 @@ def _verify_factory() -> bool:
|
|
|
62
66
|
return _parse_bool(os.getenv(ENV_REQUEST_VERIFY, DEFAULT_REQUEST_VERIFY))
|
|
63
67
|
|
|
64
68
|
|
|
69
|
+
def _stream_max_workers_factory() -> int:
|
|
70
|
+
return int(os.getenv(ENV_STREAM_MAX_WORKERS, DEFAULT_STREAM_MAX_WORKERS))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _stream_max_queue_bound_factory() -> int:
|
|
74
|
+
return int(
|
|
75
|
+
os.getenv(ENV_STREAM_MAX_QUEUE_BOUND, DEFAULT_STREAM_MAX_QUEUE_BOUND)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
65
79
|
def _mask_secret(secret: str, N: int = 4) -> str:
|
|
66
80
|
"""Show first N chars then '***'; empty string if empty."""
|
|
67
81
|
return f"{secret[:N]}***"
|
|
@@ -80,6 +94,10 @@ class SDKConfiguration:
|
|
|
80
94
|
flight_server_port: int = field(default_factory=_flight_port_factory)
|
|
81
95
|
flight_scheme: str = field(default_factory=_flight_scheme_factory)
|
|
82
96
|
request_verify: bool = field(default_factory=_verify_factory)
|
|
97
|
+
stream_max_workers: int = field(default_factory=_stream_max_workers_factory)
|
|
98
|
+
stream_max_queue_bound: int = field(
|
|
99
|
+
default_factory=_stream_max_queue_bound_factory
|
|
100
|
+
)
|
|
83
101
|
|
|
84
102
|
# Private, excluded from comparisons & repr
|
|
85
103
|
_headers: Dict[str, str] = field(init=False, repr=False, compare=False)
|
|
@@ -97,6 +115,10 @@ class SDKConfiguration:
|
|
|
97
115
|
def files_url(self) -> str:
|
|
98
116
|
return _endpoint(self.api_scheme, self.api_host, "/v1/pandas_arrow")
|
|
99
117
|
|
|
118
|
+
@property
|
|
119
|
+
def records_url(self) -> str:
|
|
120
|
+
return _endpoint(self.api_scheme, self.api_host, "/v1/log")
|
|
121
|
+
|
|
100
122
|
@property
|
|
101
123
|
def headers(self) -> Dict[str, str]:
|
|
102
124
|
# Create base headers
|
|
@@ -110,17 +132,29 @@ class SDKConfiguration:
|
|
|
110
132
|
# "sync": "0", # Defaults to async logging
|
|
111
133
|
}
|
|
112
134
|
|
|
135
|
+
@property
|
|
136
|
+
def headers_grpc(self) -> Dict[str, str]:
|
|
137
|
+
return {
|
|
138
|
+
"authorization": self.api_key,
|
|
139
|
+
"Grpc-Metadata-sdk-language": "python",
|
|
140
|
+
"Grpc-Metadata-language-version": get_python_version(),
|
|
141
|
+
"Grpc-Metadata-sdk-version": __version__,
|
|
142
|
+
# "Grpc-Metadata-arize-space-id": space_id,
|
|
143
|
+
# "Grpc-Metadata-arize-interface": "stream",
|
|
144
|
+
}
|
|
145
|
+
|
|
113
146
|
def __repr__(self) -> str:
|
|
114
147
|
# Dynamically build repr for all fields
|
|
115
|
-
|
|
148
|
+
lines = [f"{self.__class__.__name__}("]
|
|
116
149
|
for f in fields(self):
|
|
117
150
|
if not f.repr:
|
|
118
151
|
continue
|
|
119
152
|
val = getattr(self, f.name)
|
|
120
153
|
if f.name == "api_key":
|
|
121
154
|
val = _mask_secret(val, 6)
|
|
122
|
-
|
|
123
|
-
|
|
155
|
+
lines.append(f" {f.name}={val!r},")
|
|
156
|
+
lines.append(")")
|
|
157
|
+
return "\n".join(lines)
|
|
124
158
|
|
|
125
159
|
# TODO(Kiko): This may not be well placed in this class
|
|
126
160
|
def get_generated_client(self):
|
arize/constants/config.py
CHANGED
|
@@ -17,6 +17,12 @@ DEFAULT_FLIGHT_PORT = 443
|
|
|
17
17
|
DEFAULT_FLIGHT_TRANSPORT_SCHEME = "grpc+tls"
|
|
18
18
|
DEFAULT_REQUEST_VERIFY = True
|
|
19
19
|
|
|
20
|
+
# ML Streaming configuration
|
|
21
|
+
ENV_STREAM_MAX_WORKERS = "ARIZE_STREAM_MAX_WORKERS"
|
|
22
|
+
ENV_STREAM_MAX_QUEUE_BOUND = "ARIZE_STREAM_MAX_QUEUE_BOUND"
|
|
23
|
+
DEFAULT_STREAM_MAX_WORKERS = 8
|
|
24
|
+
DEFAULT_STREAM_MAX_QUEUE_BOUND = 5000
|
|
25
|
+
|
|
20
26
|
# Logging
|
|
21
27
|
ENV_LOG_ENABLE = "ARIZE_LOG_ENABLE"
|
|
22
28
|
ENV_LOG_LEVEL = "ARIZE_LOG_LEVEL"
|
arize/constants/ml.py
CHANGED
|
@@ -1,28 +1,31 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
1
4
|
# MAX_BYTES_PER_BULK_RECORD = 100000
|
|
2
5
|
# MAX_DAYS_WITHIN_RANGE = 365
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
6
|
+
MIN_PREDICTION_ID_LEN = 1
|
|
7
|
+
MAX_PREDICTION_ID_LEN = 512
|
|
8
|
+
MIN_DOCUMENT_ID_LEN = 1
|
|
9
|
+
MAX_DOCUMENT_ID_LEN = 128
|
|
7
10
|
# # The maximum number of character for tag values
|
|
8
11
|
MAX_TAG_LENGTH = 20_000
|
|
9
|
-
|
|
12
|
+
MAX_TAG_LENGTH_TRUNCATION = 1_000
|
|
10
13
|
# # The maximum number of character for embedding raw data
|
|
11
14
|
MAX_RAW_DATA_CHARACTERS = 2_000_000
|
|
12
|
-
|
|
13
|
-
#
|
|
15
|
+
MAX_RAW_DATA_CHARACTERS_TRUNCATION = 5_000
|
|
16
|
+
# The maximum number of acceptable years in the past from current time for prediction_timestamps
|
|
14
17
|
MAX_PAST_YEARS_FROM_CURRENT_TIME = 5
|
|
15
|
-
#
|
|
18
|
+
# The maximum number of acceptable years in the future from current time for prediction_timestamps
|
|
16
19
|
MAX_FUTURE_YEARS_FROM_CURRENT_TIME = 1
|
|
17
20
|
# # The maximum number of character for llm model name
|
|
18
|
-
|
|
19
|
-
|
|
21
|
+
MAX_LLM_MODEL_NAME_LENGTH = 20_000
|
|
22
|
+
MAX_LLM_MODEL_NAME_LENGTH_TRUNCATION = 50
|
|
20
23
|
# # The maximum number of character for prompt template
|
|
21
|
-
|
|
22
|
-
|
|
24
|
+
MAX_PROMPT_TEMPLATE_LENGTH = 50_000
|
|
25
|
+
MAX_PROMPT_TEMPLATE_LENGTH_TRUNCATION = 5_000
|
|
23
26
|
# # The maximum number of character for prompt template version
|
|
24
|
-
|
|
25
|
-
|
|
27
|
+
MAX_PROMPT_TEMPLATE_VERSION_LENGTH = 20_000
|
|
28
|
+
MAX_PROMPT_TEMPLATE_VERSION_LENGTH_TRUNCATION = 50
|
|
26
29
|
# # The maximum number of embeddings
|
|
27
30
|
MAX_NUMBER_OF_EMBEDDINGS = 30
|
|
28
31
|
MAX_EMBEDDING_DIMENSIONALITY = 20_000
|
|
@@ -37,21 +40,20 @@ MAX_NUMBER_OF_SIMILARITY_REFERENCES = 10
|
|
|
37
40
|
# GENERATED_LLM_PARAMS_JSON_COL = "arize_generated_llm_params_json"
|
|
38
41
|
#
|
|
39
42
|
# # reserved columns for LLM run metadata
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
#
|
|
45
|
-
# # all reserved tags
|
|
46
|
-
# RESERVED_TAG_COLS = [
|
|
47
|
-
# LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME,
|
|
48
|
-
# LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME,
|
|
49
|
-
# LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME,
|
|
50
|
-
# LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME,
|
|
51
|
-
# ]
|
|
52
|
-
#
|
|
53
|
-
#
|
|
54
|
-
# path = Path(__file__).with_name("model_mapping.json")
|
|
55
|
-
# with path.open("r") as f:
|
|
56
|
-
# MODEL_MAPPING_CONFIG = json.load(f)
|
|
43
|
+
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME = "total_token_count"
|
|
44
|
+
LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME = "prompt_token_count"
|
|
45
|
+
LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME = "response_token_count"
|
|
46
|
+
LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME = "response_latency_ms"
|
|
57
47
|
#
|
|
48
|
+
# all reserved tags
|
|
49
|
+
RESERVED_TAG_COLS = [
|
|
50
|
+
LLM_RUN_METADATA_TOTAL_TOKEN_COUNT_TAG_NAME,
|
|
51
|
+
LLM_RUN_METADATA_PROMPT_TOKEN_COUNT_TAG_NAME,
|
|
52
|
+
LLM_RUN_METADATA_RESPONSE_TOKEN_COUNT_TAG_NAME,
|
|
53
|
+
LLM_RUN_METADATA_RESPONSE_LATENCY_MS_TAG_NAME,
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
path = Path(__file__).with_name("model_mapping.json")
|
|
58
|
+
with path.open("r") as f:
|
|
59
|
+
MODEL_MAPPING_CONFIG = json.load(f)
|