arize 8.0.0b1__py3-none-any.whl → 8.0.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +9 -2
- arize/_client_factory.py +50 -0
- arize/_exporter/client.py +18 -17
- arize/_exporter/parsers/tracing_data_parser.py +9 -4
- arize/_exporter/validation.py +1 -1
- arize/_flight/client.py +37 -17
- arize/_generated/api_client/api/datasets_api.py +6 -6
- arize/_generated/api_client/api/experiments_api.py +6 -6
- arize/_generated/api_client/api/projects_api.py +3 -3
- arize/_lazy.py +61 -10
- arize/client.py +66 -50
- arize/config.py +175 -48
- arize/constants/config.py +1 -0
- arize/constants/ml.py +9 -16
- arize/constants/spans.py +5 -10
- arize/datasets/client.py +45 -28
- arize/datasets/errors.py +1 -1
- arize/datasets/validation.py +2 -2
- arize/embeddings/auto_generator.py +16 -9
- arize/embeddings/base_generators.py +15 -9
- arize/embeddings/cv_generators.py +2 -2
- arize/embeddings/errors.py +2 -2
- arize/embeddings/nlp_generators.py +8 -8
- arize/embeddings/tabular_generators.py +6 -6
- arize/exceptions/base.py +0 -52
- arize/exceptions/config.py +22 -0
- arize/exceptions/parameters.py +1 -330
- arize/exceptions/values.py +8 -5
- arize/experiments/__init__.py +4 -0
- arize/experiments/client.py +31 -18
- arize/experiments/evaluators/base.py +12 -9
- arize/experiments/evaluators/executors.py +16 -7
- arize/experiments/evaluators/rate_limiters.py +3 -1
- arize/experiments/evaluators/types.py +9 -7
- arize/experiments/evaluators/utils.py +7 -5
- arize/experiments/functions.py +128 -58
- arize/experiments/tracing.py +4 -1
- arize/experiments/types.py +34 -31
- arize/logging.py +54 -33
- arize/ml/batch_validation/errors.py +10 -1004
- arize/ml/batch_validation/validator.py +351 -291
- arize/ml/bounded_executor.py +25 -6
- arize/ml/casting.py +51 -33
- arize/ml/client.py +43 -35
- arize/ml/proto.py +21 -22
- arize/ml/stream_validation.py +64 -27
- arize/ml/surrogate_explainer/mimic.py +18 -10
- arize/ml/types.py +27 -67
- arize/pre_releases.py +10 -6
- arize/projects/client.py +9 -4
- arize/py.typed +0 -0
- arize/regions.py +11 -11
- arize/spans/client.py +125 -31
- arize/spans/columns.py +32 -36
- arize/spans/conversion.py +12 -11
- arize/spans/validation/annotations/dataframe_form_validation.py +1 -1
- arize/spans/validation/annotations/value_validation.py +11 -14
- arize/spans/validation/common/argument_validation.py +3 -3
- arize/spans/validation/common/dataframe_form_validation.py +7 -7
- arize/spans/validation/common/value_validation.py +11 -14
- arize/spans/validation/evals/dataframe_form_validation.py +4 -4
- arize/spans/validation/evals/evals_validation.py +6 -6
- arize/spans/validation/evals/value_validation.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +1 -1
- arize/spans/validation/metadata/dataframe_form_validation.py +2 -2
- arize/spans/validation/metadata/value_validation.py +23 -1
- arize/spans/validation/spans/dataframe_form_validation.py +2 -2
- arize/spans/validation/spans/spans_validation.py +6 -6
- arize/utils/arrow.py +38 -2
- arize/utils/cache.py +2 -2
- arize/utils/dataframe.py +4 -4
- arize/utils/online_tasks/dataframe_preprocessor.py +15 -11
- arize/utils/openinference_conversion.py +10 -10
- arize/utils/proto.py +0 -1
- arize/utils/types.py +6 -6
- arize/version.py +1 -1
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/METADATA +32 -7
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/RECORD +81 -78
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/WHEEL +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0b1.dist-info → arize-8.0.0b4.dist-info}/licenses/NOTICE +0 -0
arize/experiments/types.py
CHANGED
|
@@ -11,6 +11,7 @@ from datetime import datetime, timezone
|
|
|
11
11
|
from importlib.metadata import version
|
|
12
12
|
from random import getrandbits
|
|
13
13
|
from typing import (
|
|
14
|
+
NoReturn,
|
|
14
15
|
cast,
|
|
15
16
|
)
|
|
16
17
|
|
|
@@ -23,8 +24,6 @@ from arize.experiments.evaluators.types import (
|
|
|
23
24
|
)
|
|
24
25
|
|
|
25
26
|
ExperimentId = str
|
|
26
|
-
# DatasetId= str
|
|
27
|
-
# DatasetVersionId= str
|
|
28
27
|
ExampleId = str
|
|
29
28
|
RepetitionNumber = int
|
|
30
29
|
ExperimentRunId = str
|
|
@@ -92,11 +91,13 @@ class Example:
|
|
|
92
91
|
def from_dict(cls, obj: Mapping[str, object]) -> Example:
|
|
93
92
|
"""Create an Example instance from a dictionary."""
|
|
94
93
|
return cls(
|
|
95
|
-
id=obj["id"],
|
|
96
|
-
input=obj["input"],
|
|
97
|
-
output=obj["output"],
|
|
98
|
-
metadata=
|
|
99
|
-
|
|
94
|
+
id=cast("str", obj["id"]),
|
|
95
|
+
input=cast("Mapping[str, JSONSerializable]", obj["input"]),
|
|
96
|
+
output=cast("Mapping[str, JSONSerializable]", obj["output"]),
|
|
97
|
+
metadata=cast(
|
|
98
|
+
"Mapping[str, JSONSerializable]", obj.get("metadata") or {}
|
|
99
|
+
),
|
|
100
|
+
updated_at=cast("datetime", obj["updated_at"]),
|
|
100
101
|
)
|
|
101
102
|
|
|
102
103
|
def __repr__(self) -> str:
|
|
@@ -150,7 +151,7 @@ def _make_read_only(
|
|
|
150
151
|
return obj
|
|
151
152
|
|
|
152
153
|
|
|
153
|
-
class _ReadOnly(ObjectProxy):
|
|
154
|
+
class _ReadOnly(ObjectProxy):
|
|
154
155
|
def __setitem__(self, *args: object, **kwargs: object) -> object:
|
|
155
156
|
raise NotImplementedError
|
|
156
157
|
|
|
@@ -229,15 +230,15 @@ class ExperimentRun:
|
|
|
229
230
|
def from_dict(cls, obj: Mapping[str, object]) -> ExperimentRun:
|
|
230
231
|
"""Create an ExperimentRun instance from a dictionary."""
|
|
231
232
|
return cls(
|
|
232
|
-
start_time=obj["start_time"],
|
|
233
|
-
end_time=obj["end_time"],
|
|
234
|
-
experiment_id=obj["experiment_id"],
|
|
235
|
-
dataset_example_id=obj["dataset_example_id"],
|
|
236
|
-
repetition_number=obj.get("repetition_number") or 1,
|
|
237
|
-
output=_make_read_only(obj.get("output")),
|
|
238
|
-
error=obj.get("error"),
|
|
239
|
-
id=obj["id"],
|
|
240
|
-
trace_id=obj.get("trace_id"),
|
|
233
|
+
start_time=cast("datetime", obj["start_time"]),
|
|
234
|
+
end_time=cast("datetime", obj["end_time"]),
|
|
235
|
+
experiment_id=cast("str", obj["experiment_id"]),
|
|
236
|
+
dataset_example_id=cast("str", obj["dataset_example_id"]),
|
|
237
|
+
repetition_number=cast("int", obj.get("repetition_number") or 1),
|
|
238
|
+
output=cast("JSONSerializable", _make_read_only(obj.get("output"))),
|
|
239
|
+
error=cast("str | None", obj.get("error")),
|
|
240
|
+
id=cast("str", obj["id"]),
|
|
241
|
+
trace_id=cast("str | None", obj.get("trace_id")),
|
|
241
242
|
)
|
|
242
243
|
|
|
243
244
|
def __post_init__(self) -> None:
|
|
@@ -263,9 +264,9 @@ class ExperimentEvaluationRun:
|
|
|
263
264
|
name: The name of the evaluation run.
|
|
264
265
|
annotator_kind: The kind of annotator used in the evaluation run.
|
|
265
266
|
error: The error message if the evaluation run failed.
|
|
266
|
-
result (
|
|
267
|
+
result (EvaluationResult | :obj:`None`): The result of the evaluation run.
|
|
267
268
|
id (str): The unique identifier for the evaluation run.
|
|
268
|
-
trace_id (
|
|
269
|
+
trace_id (TraceId | :obj:`None`): The trace identifier for the evaluation run.
|
|
269
270
|
"""
|
|
270
271
|
|
|
271
272
|
experiment_run_id: ExperimentRunId
|
|
@@ -282,15 +283,17 @@ class ExperimentEvaluationRun:
|
|
|
282
283
|
def from_dict(cls, obj: Mapping[str, object]) -> ExperimentEvaluationRun:
|
|
283
284
|
"""Create an ExperimentEvaluationRun instance from a dictionary."""
|
|
284
285
|
return cls(
|
|
285
|
-
experiment_run_id=obj["experiment_run_id"],
|
|
286
|
-
start_time=obj["start_time"],
|
|
287
|
-
end_time=obj["end_time"],
|
|
288
|
-
name=obj["name"],
|
|
289
|
-
annotator_kind=obj["annotator_kind"],
|
|
290
|
-
error=obj.get("error"),
|
|
291
|
-
result=EvaluationResult.from_dict(
|
|
292
|
-
|
|
293
|
-
|
|
286
|
+
experiment_run_id=cast("str", obj["experiment_run_id"]),
|
|
287
|
+
start_time=cast("datetime", obj["start_time"]),
|
|
288
|
+
end_time=cast("datetime", obj["end_time"]),
|
|
289
|
+
name=cast("str", obj["name"]),
|
|
290
|
+
annotator_kind=cast("str", obj["annotator_kind"]),
|
|
291
|
+
error=cast("str | None", obj.get("error")),
|
|
292
|
+
result=EvaluationResult.from_dict(
|
|
293
|
+
cast("Mapping[str, object] | None", obj.get("result"))
|
|
294
|
+
),
|
|
295
|
+
id=cast("str", obj["id"]),
|
|
296
|
+
trace_id=cast("str | None", obj.get("trace_id")),
|
|
294
297
|
)
|
|
295
298
|
|
|
296
299
|
def __post_init__(self) -> None:
|
|
@@ -336,7 +339,7 @@ class _HasStats:
|
|
|
336
339
|
text = self.stats.__str__()
|
|
337
340
|
else:
|
|
338
341
|
text = self.stats.to_markdown(index=False)
|
|
339
|
-
return f"{self.title}\n{'-' * len(self.title)}\n" + text
|
|
342
|
+
return f"{self.title}\n{'-' * len(self.title)}\n" + text
|
|
340
343
|
|
|
341
344
|
|
|
342
345
|
@dataclass(frozen=True)
|
|
@@ -380,7 +383,7 @@ class _TaskSummary(_HasStats):
|
|
|
380
383
|
return summary
|
|
381
384
|
|
|
382
385
|
@classmethod
|
|
383
|
-
def __new__(cls, *args: object, **kwargs: object) ->
|
|
386
|
+
def __new__(cls, *args: object, **kwargs: object) -> NoReturn:
|
|
384
387
|
# Direct instantiation by users is discouraged.
|
|
385
388
|
raise NotImplementedError
|
|
386
389
|
|
|
@@ -398,7 +401,7 @@ def _top_string(s: pd.Series, length: int = 100) -> str | None:
|
|
|
398
401
|
|
|
399
402
|
@dataclass
|
|
400
403
|
class ExperimentTaskFieldNames:
|
|
401
|
-
"""Column names for mapping experiment task results in a DataFrame
|
|
404
|
+
"""Column names for mapping experiment task results in a :class:`pandas.DataFrame`.
|
|
402
405
|
|
|
403
406
|
Args:
|
|
404
407
|
example_id: Name of column containing example IDs.
|
arize/logging.py
CHANGED
|
@@ -6,8 +6,11 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
|
-
from collections.abc import Iterable, Mapping
|
|
10
|
-
from typing import Any, ClassVar
|
|
9
|
+
from collections.abc import Iterable, Mapping, MutableMapping
|
|
10
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
import requests
|
|
11
14
|
|
|
12
15
|
from arize.config import _parse_bool
|
|
13
16
|
from arize.constants.config import (
|
|
@@ -34,9 +37,18 @@ class CtxAdapter(logging.LoggerAdapter):
|
|
|
34
37
|
"""LoggerAdapter that merges bound context with per-call extras safely."""
|
|
35
38
|
|
|
36
39
|
def process(
|
|
37
|
-
self, msg: object, kwargs:
|
|
38
|
-
) -> tuple[object,
|
|
39
|
-
"""Process the logging call by merging bound and call extras.
|
|
40
|
+
self, msg: object, kwargs: MutableMapping[str, Any]
|
|
41
|
+
) -> tuple[object, MutableMapping[str, Any]]:
|
|
42
|
+
"""Process the logging call by merging bound and call extras.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
msg: The log message to process.
|
|
46
|
+
kwargs: Keyword arguments from the logging call, may include 'extra' dict.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
tuple[object, dict[str, object]]: A tuple of (message, modified_kwargs) with
|
|
50
|
+
merged extra context.
|
|
51
|
+
"""
|
|
40
52
|
call_extra = _coerce_mapping(kwargs.pop("extra", None))
|
|
41
53
|
bound_extra = _coerce_mapping(self.extra)
|
|
42
54
|
merged = (
|
|
@@ -49,13 +61,24 @@ class CtxAdapter(logging.LoggerAdapter):
|
|
|
49
61
|
return msg, kwargs
|
|
50
62
|
|
|
51
63
|
def with_extra(self, **more: object) -> CtxAdapter:
|
|
52
|
-
"""Return a copy of this adapter with additional bound extras.
|
|
64
|
+
"""Return a copy of this adapter with additional bound extras.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
**more: Additional key-value pairs to merge into the bound extras.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
CtxAdapter: A new adapter instance with merged extra context.
|
|
71
|
+
"""
|
|
53
72
|
base = _coerce_mapping(self.extra)
|
|
54
73
|
base.update(_coerce_mapping(more))
|
|
55
74
|
return type(self)(self.logger, base)
|
|
56
75
|
|
|
57
76
|
def without_extra(self) -> CtxAdapter:
|
|
58
|
-
"""Return a copy of this adapter with *no* bound extras.
|
|
77
|
+
"""Return a copy of this adapter with *no* bound extras.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
CtxAdapter: A new adapter instance without any bound extra context.
|
|
81
|
+
"""
|
|
59
82
|
return type(self)(self.logger, None)
|
|
60
83
|
|
|
61
84
|
|
|
@@ -86,7 +109,14 @@ class CustomLogFormatter(logging.Formatter):
|
|
|
86
109
|
super().__init__(fmt=fmt)
|
|
87
110
|
|
|
88
111
|
def format(self, record: logging.LogRecord) -> str:
|
|
89
|
-
"""Format the log record with color based on log level.
|
|
112
|
+
"""Format the log record with color based on log level.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
record: The log record to format.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
str: Formatted and color-coded log message with any extra fields appended.
|
|
119
|
+
"""
|
|
90
120
|
# Build the base message without any color.
|
|
91
121
|
base = super().format(record)
|
|
92
122
|
|
|
@@ -113,31 +143,22 @@ class JsonFormatter(logging.Formatter):
|
|
|
113
143
|
"""Minimal JSON formatter (one JSON object per line)."""
|
|
114
144
|
|
|
115
145
|
# fields to skip copying from record.__dict__
|
|
116
|
-
_skip: ClassVar[set[str]] =
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
# "pathname",
|
|
123
|
-
# "filename",
|
|
124
|
-
# "module",
|
|
125
|
-
# "exc_info",
|
|
126
|
-
# "exc_text",
|
|
127
|
-
# "stack_info",
|
|
128
|
-
# "lineno",
|
|
129
|
-
# "funcName",
|
|
130
|
-
# "created",
|
|
131
|
-
# "msecs",
|
|
132
|
-
# "relativeCreated",
|
|
133
|
-
# "thread",
|
|
134
|
-
# "threadName",
|
|
135
|
-
# "processName",
|
|
136
|
-
# "process",
|
|
137
|
-
}
|
|
146
|
+
_skip: ClassVar[set[str]] = set()
|
|
147
|
+
# Potential fields to skip:
|
|
148
|
+
# "name", "msg", "args", "levelname", "levelno", "pathname",
|
|
149
|
+
# "filename", "module", "exc_info", "exc_text", "stack_info",
|
|
150
|
+
# "lineno", "funcName", "created", "msecs", "relativeCreated",
|
|
151
|
+
# "thread", "threadName", "processName", "process"
|
|
138
152
|
|
|
139
153
|
def format(self, record: logging.LogRecord) -> str:
|
|
140
|
-
"""Format the log record as a JSON string.
|
|
154
|
+
"""Format the log record as a JSON string.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
record: The log record to format.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
str: JSON-formatted log message as a single line with all fields and extras.
|
|
161
|
+
"""
|
|
141
162
|
payload: dict[str, object] = {
|
|
142
163
|
# "time": self.formatTime(record, datefmt="%Y-%m-%dT%H:%M:%S%z"),
|
|
143
164
|
# "logger": record.name,
|
|
@@ -259,7 +280,7 @@ def log_a_list(values: Iterable[Any] | None, join_word: str) -> str:
|
|
|
259
280
|
"""Format a list of values into a human-readable string with a joining word.
|
|
260
281
|
|
|
261
282
|
Args:
|
|
262
|
-
values: An iterable of values to format, or None
|
|
283
|
+
values: An iterable of values to format, or :obj:`None`.
|
|
263
284
|
join_word: The word to use before the last item (e.g., "and", "or").
|
|
264
285
|
|
|
265
286
|
Returns:
|
|
@@ -277,7 +298,7 @@ def log_a_list(values: Iterable[Any] | None, join_word: str) -> str:
|
|
|
277
298
|
)
|
|
278
299
|
|
|
279
300
|
|
|
280
|
-
def get_arize_project_url(response:
|
|
301
|
+
def get_arize_project_url(response: requests.Response) -> str:
|
|
281
302
|
"""Extract the Arize project URL from an API response.
|
|
282
303
|
|
|
283
304
|
Args:
|