arize 8.0.0a23__py3-none-any.whl → 8.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +11 -10
- arize/_exporter/client.py +1 -1
- arize/client.py +36 -126
- arize/config.py +59 -100
- arize/datasets/client.py +11 -6
- arize/embeddings/nlp_generators.py +12 -6
- arize/embeddings/tabular_generators.py +14 -11
- arize/experiments/__init__.py +12 -0
- arize/experiments/client.py +11 -6
- arize/{models → ml}/batch_validation/errors.py +2 -2
- arize/{models → ml}/batch_validation/validator.py +5 -3
- arize/{models → ml}/casting.py +42 -78
- arize/{models → ml}/client.py +19 -17
- arize/{models → ml}/proto.py +2 -2
- arize/{models → ml}/stream_validation.py +1 -1
- arize/{models → ml}/surrogate_explainer/mimic.py +6 -2
- arize/{types.py → ml/types.py} +99 -234
- arize/pre_releases.py +2 -1
- arize/projects/client.py +11 -6
- arize/spans/client.py +89 -84
- arize/spans/conversion.py +11 -4
- arize/spans/validation/common/value_validation.py +1 -1
- arize/spans/validation/spans/dataframe_form_validation.py +1 -1
- arize/spans/validation/spans/value_validation.py +2 -1
- arize/utils/dataframe.py +1 -1
- arize/utils/online_tasks/dataframe_preprocessor.py +5 -6
- arize/utils/types.py +105 -0
- arize/version.py +1 -1
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/METADATA +10 -4
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/RECORD +37 -36
- /arize/{models → ml}/__init__.py +0 -0
- /arize/{models → ml}/batch_validation/__init__.py +0 -0
- /arize/{models → ml}/bounded_executor.py +0 -0
- /arize/{models → ml}/surrogate_explainer/__init__.py +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/WHEEL +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/LICENSE +0 -0
- {arize-8.0.0a23.dist-info → arize-8.0.0b0.dist-info}/licenses/NOTICE +0 -0
arize/{models → ml}/casting.py
RENAMED
|
@@ -9,7 +9,13 @@ from typing import TYPE_CHECKING
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
11
11
|
from arize.logging import log_a_list
|
|
12
|
-
from arize.types import
|
|
12
|
+
from arize.ml.types import (
|
|
13
|
+
ArizeTypes,
|
|
14
|
+
Schema,
|
|
15
|
+
TypedColumns,
|
|
16
|
+
TypedValue,
|
|
17
|
+
)
|
|
18
|
+
from arize.utils.types import is_list_of
|
|
13
19
|
|
|
14
20
|
if TYPE_CHECKING:
|
|
15
21
|
import pandas as pd
|
|
@@ -125,29 +131,20 @@ def cast_typed_columns(
|
|
|
125
131
|
This optional feature provides a simple way for users to prevent type drift within
|
|
126
132
|
a column across many SDK uploads.
|
|
127
133
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
A deepcopy of the user's dataframe.
|
|
132
|
-
schema: Schema
|
|
133
|
-
The schema, which may include feature and tag column names
|
|
134
|
+
Args:
|
|
135
|
+
dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
|
|
136
|
+
schema (Schema): The schema, which may include feature and tag column names
|
|
134
137
|
in a TypedColumns object or a List[string].
|
|
135
138
|
|
|
136
139
|
Returns:
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
A new Schema object, with feature and tag column names converted to the List[string] format
|
|
142
|
-
expected in downstream validation.
|
|
140
|
+
tuple[pd.DataFrame, Schema]: A tuple containing:
|
|
141
|
+
- dataframe: The dataframe, with columns cast to the specified types.
|
|
142
|
+
- schema: A new Schema object, with feature and tag column names converted
|
|
143
|
+
to the List[string] format expected in downstream validation.
|
|
143
144
|
|
|
144
145
|
Raises:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
If casting fails.
|
|
148
|
-
InvalidTypedColumnsError
|
|
149
|
-
If the TypedColumns object is invalid.
|
|
150
|
-
|
|
146
|
+
ColumnCastingError: If casting fails.
|
|
147
|
+
InvalidTypedColumnsError: If the TypedColumns object is invalid.
|
|
151
148
|
"""
|
|
152
149
|
typed_column_fields = schema.typed_column_fields()
|
|
153
150
|
feature_field = "feature_column_names"
|
|
@@ -204,21 +201,14 @@ def _cast_value(
|
|
|
204
201
|
) -> str | int | float | list[str] | None:
|
|
205
202
|
"""Casts a TypedValue to its provided type, preserving all null values as None or float('nan').
|
|
206
203
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
typed_value: TypedValue
|
|
210
|
-
The TypedValue to cast.
|
|
204
|
+
Args:
|
|
205
|
+
typed_value (TypedValue): The TypedValue to cast.
|
|
211
206
|
|
|
212
207
|
Returns:
|
|
213
|
-
|
|
214
|
-
Union[str, int, float, List[str], None]
|
|
215
|
-
The cast value.
|
|
208
|
+
str | int | float | list[str] | None: The cast value.
|
|
216
209
|
|
|
217
210
|
Raises:
|
|
218
|
-
|
|
219
|
-
CastingError
|
|
220
|
-
If the value cannot be cast to the provided type.
|
|
221
|
-
|
|
211
|
+
CastingError: If the value cannot be cast to the provided type.
|
|
222
212
|
"""
|
|
223
213
|
if typed_value.value is None:
|
|
224
214
|
return None
|
|
@@ -274,18 +264,13 @@ def _validate_typed_columns(
|
|
|
274
264
|
) -> None:
|
|
275
265
|
"""Validate a TypedColumns object.
|
|
276
266
|
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
typed_columns: TypedColumns
|
|
282
|
-
The TypedColumns object to validate.
|
|
267
|
+
Args:
|
|
268
|
+
field_name (str): The name of the Schema field that the TypedColumns object
|
|
269
|
+
is associated with.
|
|
270
|
+
typed_columns (TypedColumns): The TypedColumns object to validate.
|
|
283
271
|
|
|
284
272
|
Raises:
|
|
285
|
-
|
|
286
|
-
InvalidTypedColumnsError
|
|
287
|
-
If the TypedColumns object is invalid.
|
|
288
|
-
|
|
273
|
+
InvalidTypedColumnsError: If the TypedColumns object is invalid.
|
|
289
274
|
"""
|
|
290
275
|
if typed_columns.is_empty():
|
|
291
276
|
raise InvalidTypedColumnsError(field_name=field_name, reason="is empty")
|
|
@@ -304,24 +289,16 @@ def _cast_columns(
|
|
|
304
289
|
|
|
305
290
|
(feature_column_names or tag_column_names)
|
|
306
291
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
columns: TypedColumns
|
|
312
|
-
The TypedColumns object, which specifies the columns to cast
|
|
313
|
-
(and/or to not cast) and their target types.
|
|
292
|
+
Args:
|
|
293
|
+
dataframe (pd.DataFrame): A deepcopy of the user's dataframe.
|
|
294
|
+
columns (TypedColumns): The TypedColumns object, which specifies the columns
|
|
295
|
+
to cast (and/or to not cast) and their target types.
|
|
314
296
|
|
|
315
297
|
Returns:
|
|
316
|
-
|
|
317
|
-
dataframe: pd.DataFrame
|
|
318
|
-
The dataframe with columns cast to the specified types.
|
|
298
|
+
pd.DataFrame: The dataframe with columns cast to the specified types.
|
|
319
299
|
|
|
320
300
|
Raises:
|
|
321
|
-
|
|
322
|
-
ColumnCastingError
|
|
323
|
-
If casting fails.
|
|
324
|
-
|
|
301
|
+
ColumnCastingError: If casting fails.
|
|
325
302
|
"""
|
|
326
303
|
if columns.to_str:
|
|
327
304
|
try:
|
|
@@ -372,25 +349,17 @@ def _cast_df(
|
|
|
372
349
|
) -> pd.DataFrame:
|
|
373
350
|
"""Cast columns in a dataframe to the specified type.
|
|
374
351
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
cols: List[str]
|
|
380
|
-
The list of column names to cast.
|
|
381
|
-
target_type_str: str
|
|
382
|
-
The target type to cast to.
|
|
352
|
+
Args:
|
|
353
|
+
df (pd.DataFrame): A deepcopy of the user's dataframe.
|
|
354
|
+
cols (list[str]): The list of column names to cast.
|
|
355
|
+
target_type_str (str): The target type to cast to.
|
|
383
356
|
|
|
384
357
|
Returns:
|
|
385
|
-
|
|
386
|
-
df: pd.DataFrame
|
|
387
|
-
The dataframe with columns cast to the specified types.
|
|
358
|
+
pd.DataFrame: The dataframe with columns cast to the specified types.
|
|
388
359
|
|
|
389
360
|
Raises:
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
If casting fails. Common exceptions raised by astype() are TypeError and ValueError.
|
|
393
|
-
|
|
361
|
+
Exception: If casting fails. Common exceptions raised by astype() are
|
|
362
|
+
TypeError and ValueError.
|
|
394
363
|
"""
|
|
395
364
|
nan_mapping = {"nan": np.nan, "NaN": np.nan}
|
|
396
365
|
df = df.replace(nan_mapping)
|
|
@@ -404,18 +373,13 @@ def _convert_schema_field_types(
|
|
|
404
373
|
) -> Schema:
|
|
405
374
|
"""Convert schema field types from TypedColumns to List[string] format.
|
|
406
375
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
schema: Schema
|
|
410
|
-
The schema, which may include feature and tag column names
|
|
376
|
+
Args:
|
|
377
|
+
schema (Schema): The schema, which may include feature and tag column names
|
|
411
378
|
in a TypedColumns object or a List[string].
|
|
412
379
|
|
|
413
380
|
Returns:
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
A Schema, with feature and tag column names
|
|
417
|
-
converted to the List[string] format expected in downstream validation.
|
|
418
|
-
|
|
381
|
+
Schema: A Schema, with feature and tag column names converted to the
|
|
382
|
+
List[string] format expected in downstream validation.
|
|
419
383
|
"""
|
|
420
384
|
feature_column_names_list = (
|
|
421
385
|
schema.feature_column_names
|
arize/{models → ml}/client.py
RENAMED
|
@@ -33,13 +33,13 @@ from arize.exceptions.parameters import (
|
|
|
33
33
|
)
|
|
34
34
|
from arize.exceptions.spaces import MissingSpaceIDError
|
|
35
35
|
from arize.logging import get_truncation_warning_message
|
|
36
|
-
from arize.
|
|
37
|
-
from arize.
|
|
38
|
-
from arize.
|
|
36
|
+
from arize.ml.bounded_executor import BoundedExecutor
|
|
37
|
+
from arize.ml.casting import cast_dictionary, cast_typed_columns
|
|
38
|
+
from arize.ml.stream_validation import (
|
|
39
39
|
validate_and_convert_prediction_id,
|
|
40
40
|
validate_label,
|
|
41
41
|
)
|
|
42
|
-
from arize.types import (
|
|
42
|
+
from arize.ml.types import (
|
|
43
43
|
CATEGORICAL_MODEL_TYPES,
|
|
44
44
|
NUMERIC_MODEL_TYPES,
|
|
45
45
|
ActualLabelTypes,
|
|
@@ -57,8 +57,8 @@ from arize.types import (
|
|
|
57
57
|
SimilaritySearchParams,
|
|
58
58
|
TypedValue,
|
|
59
59
|
convert_element,
|
|
60
|
-
is_list_of,
|
|
61
60
|
)
|
|
61
|
+
from arize.utils.types import is_list_of
|
|
62
62
|
|
|
63
63
|
if TYPE_CHECKING:
|
|
64
64
|
import concurrent.futures as cf
|
|
@@ -95,14 +95,18 @@ _MIMIC_EXTRA = "mimic-explainer"
|
|
|
95
95
|
|
|
96
96
|
|
|
97
97
|
class MLModelsClient:
|
|
98
|
-
"""Client for logging ML model predictions and actuals to Arize.
|
|
98
|
+
"""Client for logging ML model predictions and actuals to Arize.
|
|
99
99
|
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
This class is primarily intended for internal use within the SDK. Users are
|
|
101
|
+
highly encouraged to access resource-specific functionality via
|
|
102
|
+
:class:`arize.ArizeClient`.
|
|
103
|
+
"""
|
|
102
104
|
|
|
103
|
-
|
|
104
|
-
sdk_config: SDK configuration containing API endpoints and credentials.
|
|
105
|
+
def __init__(self, *, sdk_config: SDKConfiguration) -> None:
|
|
105
106
|
"""
|
|
107
|
+
Args:
|
|
108
|
+
sdk_config: Resolved SDK configuration.
|
|
109
|
+
""" # noqa: D205, D212
|
|
106
110
|
self._sdk_config = sdk_config
|
|
107
111
|
|
|
108
112
|
# internal cache for the futures session
|
|
@@ -202,7 +206,7 @@ class MLModelsClient:
|
|
|
202
206
|
"""
|
|
203
207
|
require(_STREAM_EXTRA, _STREAM_DEPS)
|
|
204
208
|
from arize._generated.protocol.rec import public_pb2 as pb2
|
|
205
|
-
from arize.
|
|
209
|
+
from arize.ml.proto import (
|
|
206
210
|
get_pb_dictionary,
|
|
207
211
|
get_pb_label,
|
|
208
212
|
get_pb_timestamp,
|
|
@@ -374,9 +378,7 @@ class MLModelsClient:
|
|
|
374
378
|
if embedding_features or prompt or response:
|
|
375
379
|
# NOTE: Deep copy is necessary to avoid side effects on the original input dictionary
|
|
376
380
|
combined_embedding_features = (
|
|
377
|
-
|
|
378
|
-
if embedding_features
|
|
379
|
-
else {}
|
|
381
|
+
embedding_features.copy() if embedding_features else {}
|
|
380
382
|
)
|
|
381
383
|
# Map prompt as embedding features for generative models
|
|
382
384
|
if prompt is not None:
|
|
@@ -523,7 +525,7 @@ class MLModelsClient:
|
|
|
523
525
|
indexes=None,
|
|
524
526
|
)
|
|
525
527
|
|
|
526
|
-
def
|
|
528
|
+
def log(
|
|
527
529
|
self,
|
|
528
530
|
*,
|
|
529
531
|
space_id: str,
|
|
@@ -597,7 +599,7 @@ class MLModelsClient:
|
|
|
597
599
|
import pandas.api.types as ptypes
|
|
598
600
|
import pyarrow as pa
|
|
599
601
|
|
|
600
|
-
from arize.
|
|
602
|
+
from arize.ml.batch_validation.validator import Validator
|
|
601
603
|
from arize.utils.arrow import post_arrow_table
|
|
602
604
|
from arize.utils.dataframe import remove_extraneous_columns
|
|
603
605
|
|
|
@@ -686,7 +688,7 @@ class MLModelsClient:
|
|
|
686
688
|
|
|
687
689
|
if surrogate_explainability:
|
|
688
690
|
require(_MIMIC_EXTRA, _MIMIC_DEPS)
|
|
689
|
-
from arize.
|
|
691
|
+
from arize.ml.surrogate_explainer.mimic import Mimic
|
|
690
692
|
|
|
691
693
|
logger.debug("Running surrogate_explainability.")
|
|
692
694
|
if schema.shap_values_column_names:
|
arize/{models → ml}/proto.py
RENAMED
|
@@ -8,7 +8,7 @@ from google.protobuf.wrappers_pb2 import DoubleValue, StringValue
|
|
|
8
8
|
|
|
9
9
|
from arize._generated.protocol.rec import public_pb2 as pb2
|
|
10
10
|
from arize.exceptions.parameters import InvalidValueType
|
|
11
|
-
from arize.types import (
|
|
11
|
+
from arize.ml.types import (
|
|
12
12
|
CATEGORICAL_MODEL_TYPES,
|
|
13
13
|
NUMERIC_MODEL_TYPES,
|
|
14
14
|
Embedding,
|
|
@@ -22,8 +22,8 @@ from arize.types import (
|
|
|
22
22
|
RankingPredictionLabel,
|
|
23
23
|
SemanticSegmentationLabel,
|
|
24
24
|
convert_element,
|
|
25
|
-
is_list_of,
|
|
26
25
|
)
|
|
26
|
+
from arize.utils.types import is_list_of
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def get_pb_dictionary(d: dict[object, object] | None) -> dict[str, object]:
|
|
@@ -6,7 +6,7 @@ from arize.constants.ml import MAX_PREDICTION_ID_LEN, MIN_PREDICTION_ID_LEN
|
|
|
6
6
|
from arize.exceptions.parameters import (
|
|
7
7
|
InvalidValueType,
|
|
8
8
|
)
|
|
9
|
-
from arize.types import (
|
|
9
|
+
from arize.ml.types import (
|
|
10
10
|
CATEGORICAL_MODEL_TYPES,
|
|
11
11
|
NUMERIC_MODEL_TYPES,
|
|
12
12
|
ActualLabelTypes,
|
|
@@ -15,12 +15,16 @@ from interpret_community.mimic.mimic_explainer import (
|
|
|
15
15
|
)
|
|
16
16
|
from sklearn.preprocessing import LabelEncoder
|
|
17
17
|
|
|
18
|
-
from arize.types import
|
|
18
|
+
from arize.ml.types import (
|
|
19
|
+
CATEGORICAL_MODEL_TYPES,
|
|
20
|
+
NUMERIC_MODEL_TYPES,
|
|
21
|
+
ModelTypes,
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
if TYPE_CHECKING:
|
|
21
25
|
from collections.abc import Callable
|
|
22
26
|
|
|
23
|
-
from arize.types import Schema
|
|
27
|
+
from arize.ml.types import Schema
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
class Mimic:
|