arize 8.0.0a21__py3-none-any.whl → 8.0.0a23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +17 -9
- arize/_exporter/client.py +55 -36
- arize/_exporter/parsers/tracing_data_parser.py +41 -30
- arize/_exporter/validation.py +3 -3
- arize/_flight/client.py +208 -77
- arize/_generated/api_client/__init__.py +30 -6
- arize/_generated/api_client/api/__init__.py +1 -0
- arize/_generated/api_client/api/datasets_api.py +864 -190
- arize/_generated/api_client/api/experiments_api.py +167 -131
- arize/_generated/api_client/api/projects_api.py +1197 -0
- arize/_generated/api_client/api_client.py +2 -2
- arize/_generated/api_client/configuration.py +42 -34
- arize/_generated/api_client/exceptions.py +2 -2
- arize/_generated/api_client/models/__init__.py +15 -4
- arize/_generated/api_client/models/dataset.py +10 -10
- arize/_generated/api_client/models/dataset_example.py +111 -0
- arize/_generated/api_client/models/dataset_example_update.py +100 -0
- arize/_generated/api_client/models/dataset_version.py +13 -13
- arize/_generated/api_client/models/datasets_create_request.py +16 -8
- arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
- arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
- arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
- arize/_generated/api_client/models/datasets_list200_response.py +10 -4
- arize/_generated/api_client/models/experiment.py +14 -16
- arize/_generated/api_client/models/experiment_run.py +108 -0
- arize/_generated/api_client/models/experiment_run_create.py +102 -0
- arize/_generated/api_client/models/experiments_create_request.py +16 -10
- arize/_generated/api_client/models/experiments_list200_response.py +10 -4
- arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
- arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
- arize/_generated/api_client/models/primitive_value.py +172 -0
- arize/_generated/api_client/models/problem.py +100 -0
- arize/_generated/api_client/models/project.py +99 -0
- arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
- arize/_generated/api_client/models/projects_list200_response.py +106 -0
- arize/_generated/api_client/rest.py +2 -2
- arize/_generated/api_client/test/test_dataset.py +4 -2
- arize/_generated/api_client/test/test_dataset_example.py +56 -0
- arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
- arize/_generated/api_client/test/test_dataset_version.py +7 -2
- arize/_generated/api_client/test/test_datasets_api.py +27 -13
- arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
- arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
- arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
- arize/_generated/api_client/test/test_experiment.py +2 -4
- arize/_generated/api_client/test/test_experiment_run.py +56 -0
- arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
- arize/_generated/api_client/test/test_experiments_api.py +6 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
- arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
- arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
- arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
- arize/_generated/api_client/test/test_problem.py +57 -0
- arize/_generated/api_client/test/test_project.py +58 -0
- arize/_generated/api_client/test/test_projects_api.py +59 -0
- arize/_generated/api_client/test/test_projects_create_request.py +54 -0
- arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
- arize/_generated/api_client_README.md +43 -29
- arize/_generated/protocol/flight/flight_pb2.py +400 -0
- arize/_lazy.py +27 -19
- arize/client.py +269 -55
- arize/config.py +365 -116
- arize/constants/__init__.py +1 -0
- arize/constants/config.py +11 -4
- arize/constants/ml.py +6 -4
- arize/constants/openinference.py +2 -0
- arize/constants/pyarrow.py +2 -0
- arize/constants/spans.py +3 -1
- arize/datasets/__init__.py +1 -0
- arize/datasets/client.py +299 -84
- arize/datasets/errors.py +32 -2
- arize/datasets/validation.py +18 -8
- arize/embeddings/__init__.py +2 -0
- arize/embeddings/auto_generator.py +23 -19
- arize/embeddings/base_generators.py +89 -36
- arize/embeddings/constants.py +2 -0
- arize/embeddings/cv_generators.py +26 -4
- arize/embeddings/errors.py +27 -5
- arize/embeddings/nlp_generators.py +31 -12
- arize/embeddings/tabular_generators.py +32 -20
- arize/embeddings/usecases.py +12 -2
- arize/exceptions/__init__.py +1 -0
- arize/exceptions/auth.py +11 -1
- arize/exceptions/base.py +29 -4
- arize/exceptions/models.py +21 -2
- arize/exceptions/parameters.py +31 -0
- arize/exceptions/spaces.py +12 -1
- arize/exceptions/types.py +86 -7
- arize/exceptions/values.py +220 -20
- arize/experiments/__init__.py +1 -0
- arize/experiments/client.py +390 -286
- arize/experiments/evaluators/__init__.py +1 -0
- arize/experiments/evaluators/base.py +74 -41
- arize/experiments/evaluators/exceptions.py +6 -3
- arize/experiments/evaluators/executors.py +121 -73
- arize/experiments/evaluators/rate_limiters.py +106 -57
- arize/experiments/evaluators/types.py +34 -7
- arize/experiments/evaluators/utils.py +65 -27
- arize/experiments/functions.py +103 -101
- arize/experiments/tracing.py +52 -44
- arize/experiments/types.py +56 -31
- arize/logging.py +54 -22
- arize/models/__init__.py +1 -0
- arize/models/batch_validation/__init__.py +1 -0
- arize/models/batch_validation/errors.py +543 -65
- arize/models/batch_validation/validator.py +339 -300
- arize/models/bounded_executor.py +20 -7
- arize/models/casting.py +75 -29
- arize/models/client.py +326 -107
- arize/models/proto.py +95 -40
- arize/models/stream_validation.py +42 -14
- arize/models/surrogate_explainer/__init__.py +1 -0
- arize/models/surrogate_explainer/mimic.py +24 -13
- arize/pre_releases.py +43 -0
- arize/projects/__init__.py +1 -0
- arize/projects/client.py +129 -0
- arize/regions.py +40 -0
- arize/spans/__init__.py +1 -0
- arize/spans/client.py +130 -106
- arize/spans/columns.py +13 -0
- arize/spans/conversion.py +54 -38
- arize/spans/validation/__init__.py +1 -0
- arize/spans/validation/annotations/__init__.py +1 -0
- arize/spans/validation/annotations/annotations_validation.py +6 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
- arize/spans/validation/annotations/value_validation.py +35 -11
- arize/spans/validation/common/__init__.py +1 -0
- arize/spans/validation/common/argument_validation.py +33 -8
- arize/spans/validation/common/dataframe_form_validation.py +35 -9
- arize/spans/validation/common/errors.py +211 -11
- arize/spans/validation/common/value_validation.py +80 -13
- arize/spans/validation/evals/__init__.py +1 -0
- arize/spans/validation/evals/dataframe_form_validation.py +28 -8
- arize/spans/validation/evals/evals_validation.py +34 -4
- arize/spans/validation/evals/value_validation.py +26 -3
- arize/spans/validation/metadata/__init__.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +14 -5
- arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
- arize/spans/validation/metadata/value_validation.py +24 -10
- arize/spans/validation/spans/__init__.py +1 -0
- arize/spans/validation/spans/dataframe_form_validation.py +34 -13
- arize/spans/validation/spans/spans_validation.py +35 -4
- arize/spans/validation/spans/value_validation.py +76 -7
- arize/types.py +293 -157
- arize/utils/__init__.py +1 -0
- arize/utils/arrow.py +31 -15
- arize/utils/cache.py +34 -6
- arize/utils/dataframe.py +19 -2
- arize/utils/online_tasks/__init__.py +2 -0
- arize/utils/online_tasks/dataframe_preprocessor.py +53 -41
- arize/utils/openinference_conversion.py +44 -5
- arize/utils/proto.py +10 -0
- arize/utils/size.py +5 -3
- arize/version.py +3 -1
- {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/METADATA +4 -3
- arize-8.0.0a23.dist-info/RECORD +174 -0
- {arize-8.0.0a21.dist-info → arize-8.0.0a23.dist-info}/WHEEL +1 -1
- arize-8.0.0a23.dist-info/licenses/LICENSE +176 -0
- arize-8.0.0a23.dist-info/licenses/NOTICE +13 -0
- arize/_generated/protocol/flight/export_pb2.py +0 -61
- arize/_generated/protocol/flight/ingest_pb2.py +0 -365
- arize-8.0.0a21.dist-info/RECORD +0 -146
- arize-8.0.0a21.dist-info/licenses/LICENSE.md +0 -12
arize/models/bounded_executor.py
CHANGED
|
@@ -1,23 +1,35 @@
|
|
|
1
|
+
"""Bounded thread pool executor with queue size limits."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
1
4
|
from concurrent.futures import ThreadPoolExecutor
|
|
2
5
|
from threading import BoundedSemaphore
|
|
3
6
|
|
|
4
7
|
|
|
5
8
|
class BoundedExecutor:
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
9
|
+
"""BoundedExecutor behaves as a ThreadPoolExecutor which will block on calls to submit().
|
|
10
|
+
|
|
11
|
+
Blocks once the limit given as "bound" work items are queued for execution.
|
|
12
|
+
|
|
10
13
|
:param bound: Integer - the maximum number of items in the work queue
|
|
11
14
|
:param max_workers: Integer - the size of the thread pool
|
|
12
15
|
"""
|
|
13
16
|
|
|
14
|
-
def __init__(self, bound, max_workers):
|
|
17
|
+
def __init__(self, bound: int, max_workers: int) -> None:
|
|
18
|
+
"""Initialize the bounded executor.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
bound: Maximum number of items in the work queue.
|
|
22
|
+
max_workers: Size of the thread pool.
|
|
23
|
+
"""
|
|
15
24
|
self.executor = ThreadPoolExecutor(max_workers=max_workers)
|
|
16
25
|
self.semaphore = BoundedSemaphore(bound + max_workers)
|
|
17
26
|
|
|
18
27
|
"""See concurrent.futures.Executor#submit"""
|
|
19
28
|
|
|
20
|
-
def submit(
|
|
29
|
+
def submit(
|
|
30
|
+
self, fn: Callable[..., object], *args: object, **kwargs: object
|
|
31
|
+
) -> object:
|
|
32
|
+
"""Submit a callable to be executed with bounded concurrency."""
|
|
21
33
|
self.semaphore.acquire()
|
|
22
34
|
try:
|
|
23
35
|
future = self.executor.submit(fn, *args, **kwargs)
|
|
@@ -30,5 +42,6 @@ class BoundedExecutor:
|
|
|
30
42
|
|
|
31
43
|
"""See concurrent.futures.Executor#shutdown"""
|
|
32
44
|
|
|
33
|
-
def shutdown(self, wait=True):
|
|
45
|
+
def shutdown(self, wait: bool = True) -> None:
|
|
46
|
+
"""Shutdown the executor, optionally waiting for pending tasks to complete."""
|
|
34
47
|
self.executor.shutdown(wait)
|
arize/models/casting.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
"""Type casting utilities for ML model data conversion."""
|
|
2
|
+
|
|
1
3
|
# type: ignore[pb2]
|
|
2
4
|
from __future__ import annotations
|
|
3
5
|
|
|
4
6
|
import math
|
|
5
|
-
from typing import TYPE_CHECKING
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
6
8
|
|
|
7
9
|
import numpy as np
|
|
8
10
|
|
|
@@ -14,14 +16,24 @@ if TYPE_CHECKING:
|
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class CastingError(Exception):
|
|
19
|
+
"""Raised when type casting fails for a value."""
|
|
20
|
+
|
|
17
21
|
def __str__(self) -> str:
|
|
22
|
+
"""Return a human-readable error message."""
|
|
18
23
|
return self.error_message()
|
|
19
24
|
|
|
20
25
|
def __init__(self, error_msg: str, typed_value: TypedValue) -> None:
|
|
26
|
+
"""Initialize the exception with type casting context.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
error_msg: Description of the casting failure.
|
|
30
|
+
typed_value: The TypedValue that failed to cast.
|
|
31
|
+
"""
|
|
21
32
|
self.error_msg = error_msg
|
|
22
33
|
self.typed_value = typed_value
|
|
23
34
|
|
|
24
35
|
def error_message(self) -> str:
|
|
36
|
+
"""Return the error message for this exception."""
|
|
25
37
|
return (
|
|
26
38
|
f"Failed to cast value {self.typed_value.value} of type {type(self.typed_value.value)} "
|
|
27
39
|
f"to type {self.typed_value.type}. "
|
|
@@ -30,7 +42,10 @@ class CastingError(Exception):
|
|
|
30
42
|
|
|
31
43
|
|
|
32
44
|
class ColumnCastingError(Exception):
|
|
45
|
+
"""Raised when type casting fails for a column."""
|
|
46
|
+
|
|
33
47
|
def __str__(self) -> str:
|
|
48
|
+
"""Return a human-readable error message."""
|
|
34
49
|
return self.error_message()
|
|
35
50
|
|
|
36
51
|
def __init__(
|
|
@@ -39,11 +54,19 @@ class ColumnCastingError(Exception):
|
|
|
39
54
|
attempted_columns: str,
|
|
40
55
|
attempted_type: TypedColumns,
|
|
41
56
|
) -> None:
|
|
57
|
+
"""Initialize the exception with column casting context.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
error_msg: Description of the casting failure.
|
|
61
|
+
attempted_columns: Columns that failed to cast.
|
|
62
|
+
attempted_type: The TypedColumns type that was attempted.
|
|
63
|
+
"""
|
|
42
64
|
self.error_msg = error_msg
|
|
43
65
|
self.attempted_casting_columns = attempted_columns
|
|
44
66
|
self.attempted_casting_type = attempted_type
|
|
45
67
|
|
|
46
68
|
def error_message(self) -> str:
|
|
69
|
+
"""Return the error message for this exception."""
|
|
47
70
|
return (
|
|
48
71
|
f"Failed to cast to type {self.attempted_casting_type} "
|
|
49
72
|
f"for columns: {log_a_list(self.attempted_casting_columns, 'and')}. "
|
|
@@ -52,36 +75,55 @@ class ColumnCastingError(Exception):
|
|
|
52
75
|
|
|
53
76
|
|
|
54
77
|
class InvalidTypedColumnsError(Exception):
|
|
78
|
+
"""Raised when typed columns are invalid or incorrectly specified."""
|
|
79
|
+
|
|
55
80
|
def __str__(self) -> str:
|
|
81
|
+
"""Return a human-readable error message."""
|
|
56
82
|
return self.error_message()
|
|
57
83
|
|
|
58
84
|
def __init__(self, field_name: str, reason: str) -> None:
|
|
85
|
+
"""Initialize the exception with typed columns validation context.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
field_name: Name of the schema field with invalid typed columns.
|
|
89
|
+
reason: Description of why the typed columns are invalid.
|
|
90
|
+
"""
|
|
59
91
|
self.field_name = field_name
|
|
60
92
|
self.reason = reason
|
|
61
93
|
|
|
62
94
|
def error_message(self) -> str:
|
|
95
|
+
"""Return the error message for this exception."""
|
|
63
96
|
return f"The {self.field_name} TypedColumns object {self.reason}."
|
|
64
97
|
|
|
65
98
|
|
|
66
99
|
class InvalidSchemaFieldTypeError(Exception):
|
|
100
|
+
"""Raised when schema field has invalid or unexpected type."""
|
|
101
|
+
|
|
67
102
|
def __str__(self) -> str:
|
|
103
|
+
"""Return a human-readable error message."""
|
|
68
104
|
return self.error_message()
|
|
69
105
|
|
|
70
106
|
def __init__(self, msg: str) -> None:
|
|
107
|
+
"""Initialize the exception with schema field type error message.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
msg: Error message describing the schema field type issue.
|
|
111
|
+
"""
|
|
71
112
|
self.msg = msg
|
|
72
113
|
|
|
73
114
|
def error_message(self) -> str:
|
|
115
|
+
"""Return the error message for this exception."""
|
|
74
116
|
return self.msg
|
|
75
117
|
|
|
76
118
|
|
|
77
119
|
def cast_typed_columns(
|
|
78
120
|
dataframe: pd.DataFrame,
|
|
79
121
|
schema: Schema,
|
|
80
|
-
) ->
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
This optional feature provides a simple way for users to prevent
|
|
84
|
-
|
|
122
|
+
) -> tuple[pd.DataFrame, Schema]:
|
|
123
|
+
"""Cast feature and tag columns in the dataframe to the types specified in each TypedColumns config.
|
|
124
|
+
|
|
125
|
+
This optional feature provides a simple way for users to prevent type drift within
|
|
126
|
+
a column across many SDK uploads.
|
|
85
127
|
|
|
86
128
|
Arguments:
|
|
87
129
|
---------
|
|
@@ -120,7 +162,7 @@ def cast_typed_columns(
|
|
|
120
162
|
)
|
|
121
163
|
|
|
122
164
|
# Make sure no other schema fields have this type.
|
|
123
|
-
if any(
|
|
165
|
+
if any(f for f in typed_column_fields if f not in allowed_fields):
|
|
124
166
|
raise InvalidSchemaFieldTypeError(
|
|
125
167
|
"Only the feature_column_names and tag_column_names Schema fields can be of type "
|
|
126
168
|
"TypedColumns. Fields with type TypedColumns:"
|
|
@@ -130,10 +172,7 @@ def cast_typed_columns(
|
|
|
130
172
|
for field_name in typed_column_fields:
|
|
131
173
|
f = getattr(schema, field_name)
|
|
132
174
|
if f:
|
|
133
|
-
|
|
134
|
-
_validate_typed_columns(field_name, f)
|
|
135
|
-
except InvalidTypedColumnsError:
|
|
136
|
-
raise
|
|
175
|
+
_validate_typed_columns(field_name, f)
|
|
137
176
|
dataframe = _cast_columns(dataframe, f)
|
|
138
177
|
|
|
139
178
|
# Now that the dataframe values have been cast to the specified types:
|
|
@@ -144,6 +183,14 @@ def cast_typed_columns(
|
|
|
144
183
|
|
|
145
184
|
|
|
146
185
|
def cast_dictionary(d: dict) -> dict:
|
|
186
|
+
"""Cast TypedValue entries in a dictionary to their appropriate Python types.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
d: Dictionary that may contain TypedValue objects as values.
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
Dictionary with TypedValue objects cast to their native Python types.
|
|
193
|
+
"""
|
|
147
194
|
cast_dict = {}
|
|
148
195
|
for k, v in d.items():
|
|
149
196
|
if isinstance(v, TypedValue):
|
|
@@ -154,9 +201,8 @@ def cast_dictionary(d: dict) -> dict:
|
|
|
154
201
|
|
|
155
202
|
def _cast_value(
|
|
156
203
|
typed_value: TypedValue,
|
|
157
|
-
) ->
|
|
158
|
-
"""
|
|
159
|
-
Casts a TypedValue to its provided type, preserving all null values as None or float('nan').
|
|
204
|
+
) -> str | int | float | list[str] | None:
|
|
205
|
+
"""Casts a TypedValue to its provided type, preserving all null values as None or float('nan').
|
|
160
206
|
|
|
161
207
|
Arguments:
|
|
162
208
|
---------
|
|
@@ -179,22 +225,21 @@ def _cast_value(
|
|
|
179
225
|
|
|
180
226
|
if typed_value.type == ArizeTypes.FLOAT:
|
|
181
227
|
return _cast_to_float(typed_value)
|
|
182
|
-
|
|
228
|
+
if typed_value.type == ArizeTypes.INT:
|
|
183
229
|
return _cast_to_int(typed_value)
|
|
184
|
-
|
|
230
|
+
if typed_value.type == ArizeTypes.STR:
|
|
185
231
|
return _cast_to_str(typed_value)
|
|
186
|
-
|
|
187
|
-
raise CastingError("Unknown casting type", typed_value)
|
|
232
|
+
raise CastingError("Unknown casting type", typed_value)
|
|
188
233
|
|
|
189
234
|
|
|
190
|
-
def _cast_to_float(typed_value: TypedValue) ->
|
|
235
|
+
def _cast_to_float(typed_value: TypedValue) -> float | None:
|
|
191
236
|
try:
|
|
192
237
|
return float(typed_value.value)
|
|
193
238
|
except Exception as e:
|
|
194
239
|
raise CastingError(str(e), typed_value) from e
|
|
195
240
|
|
|
196
241
|
|
|
197
|
-
def _cast_to_int(typed_value: TypedValue) ->
|
|
242
|
+
def _cast_to_int(typed_value: TypedValue) -> int | None:
|
|
198
243
|
# a NaN float can't be cast to an int. Proactively return None instead.
|
|
199
244
|
if isinstance(typed_value.value, float) and math.isnan(typed_value.value):
|
|
200
245
|
return None
|
|
@@ -214,7 +259,7 @@ def _cast_to_int(typed_value: TypedValue) -> Union[int, None]:
|
|
|
214
259
|
raise CastingError(str(e), typed_value) from e
|
|
215
260
|
|
|
216
261
|
|
|
217
|
-
def _cast_to_str(typed_value: TypedValue) ->
|
|
262
|
+
def _cast_to_str(typed_value: TypedValue) -> str | None:
|
|
218
263
|
# a NaN float can't be cast to a string. Proactively return None instead.
|
|
219
264
|
if isinstance(typed_value.value, float) and math.isnan(typed_value.value):
|
|
220
265
|
return None
|
|
@@ -227,8 +272,7 @@ def _cast_to_str(typed_value: TypedValue) -> Union[str, None]:
|
|
|
227
272
|
def _validate_typed_columns(
|
|
228
273
|
field_name: str, typed_columns: TypedColumns
|
|
229
274
|
) -> None:
|
|
230
|
-
"""
|
|
231
|
-
Validate a TypedColumns object.
|
|
275
|
+
"""Validate a TypedColumns object.
|
|
232
276
|
|
|
233
277
|
Arguments:
|
|
234
278
|
---------
|
|
@@ -256,8 +300,8 @@ def _validate_typed_columns(
|
|
|
256
300
|
def _cast_columns(
|
|
257
301
|
dataframe: pd.DataFrame, columns: TypedColumns
|
|
258
302
|
) -> pd.DataFrame:
|
|
259
|
-
"""
|
|
260
|
-
|
|
303
|
+
"""Cast columns corresponding to a single TypedColumns object and a single Arize Schema field.
|
|
304
|
+
|
|
261
305
|
(feature_column_names or tag_column_names)
|
|
262
306
|
|
|
263
307
|
Arguments:
|
|
@@ -324,9 +368,10 @@ def _cast_columns(
|
|
|
324
368
|
|
|
325
369
|
|
|
326
370
|
def _cast_df(
|
|
327
|
-
df: pd.DataFrame, cols:
|
|
371
|
+
df: pd.DataFrame, cols: list[str], target_type_str: str
|
|
328
372
|
) -> pd.DataFrame:
|
|
329
|
-
"""
|
|
373
|
+
"""Cast columns in a dataframe to the specified type.
|
|
374
|
+
|
|
330
375
|
Arguments:
|
|
331
376
|
---------
|
|
332
377
|
df: pd.DataFrame
|
|
@@ -351,13 +396,14 @@ def _cast_df(
|
|
|
351
396
|
df = df.replace(nan_mapping)
|
|
352
397
|
|
|
353
398
|
# None or NaN-based values (including np.nan) are automatically converted to pandas pd.NA type
|
|
354
|
-
return df.astype(
|
|
399
|
+
return df.astype(dict.fromkeys(cols, target_type_str))
|
|
355
400
|
|
|
356
401
|
|
|
357
402
|
def _convert_schema_field_types(
|
|
358
403
|
schema: Schema,
|
|
359
404
|
) -> Schema:
|
|
360
|
-
"""
|
|
405
|
+
"""Convert schema field types from TypedColumns to List[string] format.
|
|
406
|
+
|
|
361
407
|
Arguments:
|
|
362
408
|
---------
|
|
363
409
|
schema: Schema
|