arize 8.0.0a22__py3-none-any.whl → 8.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arize/__init__.py +28 -19
- arize/_exporter/client.py +56 -37
- arize/_exporter/parsers/tracing_data_parser.py +41 -30
- arize/_exporter/validation.py +3 -3
- arize/_flight/client.py +207 -76
- arize/_generated/api_client/__init__.py +30 -6
- arize/_generated/api_client/api/__init__.py +1 -0
- arize/_generated/api_client/api/datasets_api.py +864 -190
- arize/_generated/api_client/api/experiments_api.py +167 -131
- arize/_generated/api_client/api/projects_api.py +1197 -0
- arize/_generated/api_client/api_client.py +2 -2
- arize/_generated/api_client/configuration.py +42 -34
- arize/_generated/api_client/exceptions.py +2 -2
- arize/_generated/api_client/models/__init__.py +15 -4
- arize/_generated/api_client/models/dataset.py +10 -10
- arize/_generated/api_client/models/dataset_example.py +111 -0
- arize/_generated/api_client/models/dataset_example_update.py +100 -0
- arize/_generated/api_client/models/dataset_version.py +13 -13
- arize/_generated/api_client/models/datasets_create_request.py +16 -8
- arize/_generated/api_client/models/datasets_examples_insert_request.py +100 -0
- arize/_generated/api_client/models/datasets_examples_list200_response.py +106 -0
- arize/_generated/api_client/models/datasets_examples_update_request.py +102 -0
- arize/_generated/api_client/models/datasets_list200_response.py +10 -4
- arize/_generated/api_client/models/experiment.py +14 -16
- arize/_generated/api_client/models/experiment_run.py +108 -0
- arize/_generated/api_client/models/experiment_run_create.py +102 -0
- arize/_generated/api_client/models/experiments_create_request.py +16 -10
- arize/_generated/api_client/models/experiments_list200_response.py +10 -4
- arize/_generated/api_client/models/experiments_runs_list200_response.py +19 -5
- arize/_generated/api_client/models/{error.py → pagination_metadata.py} +13 -11
- arize/_generated/api_client/models/primitive_value.py +172 -0
- arize/_generated/api_client/models/problem.py +100 -0
- arize/_generated/api_client/models/project.py +99 -0
- arize/_generated/api_client/models/{datasets_list_examples200_response.py → projects_create_request.py} +13 -11
- arize/_generated/api_client/models/projects_list200_response.py +106 -0
- arize/_generated/api_client/rest.py +2 -2
- arize/_generated/api_client/test/test_dataset.py +4 -2
- arize/_generated/api_client/test/test_dataset_example.py +56 -0
- arize/_generated/api_client/test/test_dataset_example_update.py +52 -0
- arize/_generated/api_client/test/test_dataset_version.py +7 -2
- arize/_generated/api_client/test/test_datasets_api.py +27 -13
- arize/_generated/api_client/test/test_datasets_create_request.py +8 -4
- arize/_generated/api_client/test/{test_datasets_list_examples200_response.py → test_datasets_examples_insert_request.py} +19 -15
- arize/_generated/api_client/test/test_datasets_examples_list200_response.py +66 -0
- arize/_generated/api_client/test/test_datasets_examples_update_request.py +61 -0
- arize/_generated/api_client/test/test_datasets_list200_response.py +9 -3
- arize/_generated/api_client/test/test_experiment.py +2 -4
- arize/_generated/api_client/test/test_experiment_run.py +56 -0
- arize/_generated/api_client/test/test_experiment_run_create.py +54 -0
- arize/_generated/api_client/test/test_experiments_api.py +6 -6
- arize/_generated/api_client/test/test_experiments_create_request.py +9 -6
- arize/_generated/api_client/test/test_experiments_list200_response.py +9 -5
- arize/_generated/api_client/test/test_experiments_runs_list200_response.py +15 -5
- arize/_generated/api_client/test/test_pagination_metadata.py +53 -0
- arize/_generated/api_client/test/{test_error.py → test_primitive_value.py} +13 -14
- arize/_generated/api_client/test/test_problem.py +57 -0
- arize/_generated/api_client/test/test_project.py +58 -0
- arize/_generated/api_client/test/test_projects_api.py +59 -0
- arize/_generated/api_client/test/test_projects_create_request.py +54 -0
- arize/_generated/api_client/test/test_projects_list200_response.py +70 -0
- arize/_generated/api_client_README.md +43 -29
- arize/_generated/protocol/flight/flight_pb2.py +400 -0
- arize/_lazy.py +27 -19
- arize/client.py +181 -58
- arize/config.py +324 -116
- arize/constants/__init__.py +1 -0
- arize/constants/config.py +11 -4
- arize/constants/ml.py +6 -4
- arize/constants/openinference.py +2 -0
- arize/constants/pyarrow.py +2 -0
- arize/constants/spans.py +3 -1
- arize/datasets/__init__.py +1 -0
- arize/datasets/client.py +304 -84
- arize/datasets/errors.py +32 -2
- arize/datasets/validation.py +18 -8
- arize/embeddings/__init__.py +2 -0
- arize/embeddings/auto_generator.py +23 -19
- arize/embeddings/base_generators.py +89 -36
- arize/embeddings/constants.py +2 -0
- arize/embeddings/cv_generators.py +26 -4
- arize/embeddings/errors.py +27 -5
- arize/embeddings/nlp_generators.py +43 -18
- arize/embeddings/tabular_generators.py +46 -31
- arize/embeddings/usecases.py +12 -2
- arize/exceptions/__init__.py +1 -0
- arize/exceptions/auth.py +11 -1
- arize/exceptions/base.py +29 -4
- arize/exceptions/models.py +21 -2
- arize/exceptions/parameters.py +31 -0
- arize/exceptions/spaces.py +12 -1
- arize/exceptions/types.py +86 -7
- arize/exceptions/values.py +220 -20
- arize/experiments/__init__.py +13 -0
- arize/experiments/client.py +394 -285
- arize/experiments/evaluators/__init__.py +1 -0
- arize/experiments/evaluators/base.py +74 -41
- arize/experiments/evaluators/exceptions.py +6 -3
- arize/experiments/evaluators/executors.py +121 -73
- arize/experiments/evaluators/rate_limiters.py +106 -57
- arize/experiments/evaluators/types.py +34 -7
- arize/experiments/evaluators/utils.py +65 -27
- arize/experiments/functions.py +103 -101
- arize/experiments/tracing.py +52 -44
- arize/experiments/types.py +56 -31
- arize/logging.py +54 -22
- arize/ml/__init__.py +1 -0
- arize/ml/batch_validation/__init__.py +1 -0
- arize/{models → ml}/batch_validation/errors.py +545 -67
- arize/{models → ml}/batch_validation/validator.py +344 -303
- arize/ml/bounded_executor.py +47 -0
- arize/{models → ml}/casting.py +118 -108
- arize/{models → ml}/client.py +339 -118
- arize/{models → ml}/proto.py +97 -42
- arize/{models → ml}/stream_validation.py +43 -15
- arize/ml/surrogate_explainer/__init__.py +1 -0
- arize/{models → ml}/surrogate_explainer/mimic.py +25 -10
- arize/{types.py → ml/types.py} +355 -354
- arize/pre_releases.py +44 -0
- arize/projects/__init__.py +1 -0
- arize/projects/client.py +134 -0
- arize/regions.py +40 -0
- arize/spans/__init__.py +1 -0
- arize/spans/client.py +204 -175
- arize/spans/columns.py +13 -0
- arize/spans/conversion.py +60 -37
- arize/spans/validation/__init__.py +1 -0
- arize/spans/validation/annotations/__init__.py +1 -0
- arize/spans/validation/annotations/annotations_validation.py +6 -4
- arize/spans/validation/annotations/dataframe_form_validation.py +13 -11
- arize/spans/validation/annotations/value_validation.py +35 -11
- arize/spans/validation/common/__init__.py +1 -0
- arize/spans/validation/common/argument_validation.py +33 -8
- arize/spans/validation/common/dataframe_form_validation.py +35 -9
- arize/spans/validation/common/errors.py +211 -11
- arize/spans/validation/common/value_validation.py +81 -14
- arize/spans/validation/evals/__init__.py +1 -0
- arize/spans/validation/evals/dataframe_form_validation.py +28 -8
- arize/spans/validation/evals/evals_validation.py +34 -4
- arize/spans/validation/evals/value_validation.py +26 -3
- arize/spans/validation/metadata/__init__.py +1 -1
- arize/spans/validation/metadata/argument_validation.py +14 -5
- arize/spans/validation/metadata/dataframe_form_validation.py +26 -10
- arize/spans/validation/metadata/value_validation.py +24 -10
- arize/spans/validation/spans/__init__.py +1 -0
- arize/spans/validation/spans/dataframe_form_validation.py +35 -14
- arize/spans/validation/spans/spans_validation.py +35 -4
- arize/spans/validation/spans/value_validation.py +78 -8
- arize/utils/__init__.py +1 -0
- arize/utils/arrow.py +31 -15
- arize/utils/cache.py +34 -6
- arize/utils/dataframe.py +20 -3
- arize/utils/online_tasks/__init__.py +2 -0
- arize/utils/online_tasks/dataframe_preprocessor.py +58 -47
- arize/utils/openinference_conversion.py +44 -5
- arize/utils/proto.py +10 -0
- arize/utils/size.py +5 -3
- arize/utils/types.py +105 -0
- arize/version.py +3 -1
- {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/METADATA +13 -6
- arize-8.0.0b0.dist-info/RECORD +175 -0
- {arize-8.0.0a22.dist-info → arize-8.0.0b0.dist-info}/WHEEL +1 -1
- arize-8.0.0b0.dist-info/licenses/LICENSE +176 -0
- arize-8.0.0b0.dist-info/licenses/NOTICE +13 -0
- arize/_generated/protocol/flight/export_pb2.py +0 -61
- arize/_generated/protocol/flight/ingest_pb2.py +0 -365
- arize/models/__init__.py +0 -0
- arize/models/batch_validation/__init__.py +0 -0
- arize/models/bounded_executor.py +0 -34
- arize/models/surrogate_explainer/__init__.py +0 -0
- arize-8.0.0a22.dist-info/RECORD +0 -146
- arize-8.0.0a22.dist-info/licenses/LICENSE.md +0 -12
arize/__init__.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
|
+
"""Arize SDK for model observability and LLM tracing."""
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections.abc import Mapping
|
|
3
5
|
|
|
4
6
|
from arize._generated.api_client import models
|
|
5
7
|
from arize.client import ArizeClient
|
|
6
8
|
from arize.config import SDKConfiguration
|
|
9
|
+
from arize.regions import Region
|
|
7
10
|
|
|
8
11
|
# Attach a NullHandler by default in the top-level package
|
|
9
12
|
# so that if no configuration is installed, nothing explodes.
|
|
@@ -14,41 +17,46 @@ try:
|
|
|
14
17
|
from .logging import auto_configure_from_env
|
|
15
18
|
|
|
16
19
|
auto_configure_from_env()
|
|
17
|
-
except Exception:
|
|
18
|
-
#
|
|
20
|
+
except Exception: # noqa: S110
|
|
21
|
+
# Intentionally silent: logging configuration is optional and should never
|
|
22
|
+
# prevent SDK initialization. Users can configure logging explicitly if needed.
|
|
19
23
|
pass
|
|
20
24
|
|
|
21
|
-
__all__ = [
|
|
25
|
+
__all__ = [
|
|
26
|
+
"ArizeClient",
|
|
27
|
+
"Region",
|
|
28
|
+
"SDKConfiguration",
|
|
29
|
+
]
|
|
22
30
|
|
|
23
31
|
|
|
24
|
-
def make_to_df(field_name: str):
|
|
32
|
+
def make_to_df(field_name: str) -> object:
|
|
25
33
|
def to_df(
|
|
26
|
-
self,
|
|
34
|
+
self: object,
|
|
27
35
|
by_alias: bool = False,
|
|
28
36
|
exclude_none: str | bool = False,
|
|
29
37
|
json_normalize: bool = False,
|
|
30
38
|
convert_dtypes: bool = True,
|
|
31
|
-
):
|
|
32
|
-
"""
|
|
33
|
-
Convert a list of objects to a pandas DataFrame.
|
|
39
|
+
) -> object:
|
|
40
|
+
"""Convert a list of objects to a pandas DataFrame.
|
|
34
41
|
|
|
35
42
|
Behavior:
|
|
36
43
|
- If an item is a Pydantic v2 model, use `.model_dump(by_alias=...)`.
|
|
37
44
|
- If an item is a mapping (dict-like), use it as-is.
|
|
38
45
|
- Otherwise, raise a ValueError (unsupported row type).
|
|
39
46
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
Args:
|
|
48
|
+
self (object): The object instance containing the field to convert.
|
|
49
|
+
by_alias (bool): Use field aliases when dumping Pydantic models.
|
|
50
|
+
exclude_none (str | bool): Control None/NaN column dropping.
|
|
51
|
+
- False: keep Nones as-is
|
|
52
|
+
- "all": drop columns where all values are None/NaN
|
|
53
|
+
- "any": drop columns where any value is None/NaN
|
|
54
|
+
- True: alias for "all"
|
|
55
|
+
json_normalize (bool): If True, flatten nested dicts via `pandas.json_normalize`.
|
|
56
|
+
convert_dtypes (bool): If True, call `DataFrame.convert_dtypes()` at the end.
|
|
49
57
|
|
|
50
58
|
Returns:
|
|
51
|
-
|
|
59
|
+
pandas.DataFrame: The converted DataFrame.
|
|
52
60
|
"""
|
|
53
61
|
import pandas as pd
|
|
54
62
|
|
|
@@ -85,6 +93,7 @@ def make_to_df(field_name: str):
|
|
|
85
93
|
|
|
86
94
|
|
|
87
95
|
models.DatasetsList200Response.to_df = make_to_df("datasets") # type: ignore[attr-defined]
|
|
88
|
-
models.
|
|
96
|
+
models.DatasetsExamplesList200Response.to_df = make_to_df("examples") # type: ignore[attr-defined]
|
|
89
97
|
models.ExperimentsList200Response.to_df = make_to_df("experiments") # type: ignore[attr-defined]
|
|
90
98
|
models.ExperimentsRunsList200Response.to_df = make_to_df("experiment_runs") # type: ignore[attr-defined]
|
|
99
|
+
models.ProjectsList200Response.to_df = make_to_df("projects") # type: ignore[attr-defined]
|
arize/_exporter/client.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
import logging
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from datetime import datetime
|
|
5
|
-
from typing import List, Tuple
|
|
6
5
|
|
|
7
6
|
import pandas as pd
|
|
8
7
|
import pyarrow.parquet as pq
|
|
@@ -16,9 +15,9 @@ from arize._exporter.validation import (
|
|
|
16
15
|
validate_input_type,
|
|
17
16
|
validate_start_end_time,
|
|
18
17
|
)
|
|
19
|
-
from arize._generated.protocol.flight import
|
|
18
|
+
from arize._generated.protocol.flight import flight_pb2
|
|
20
19
|
from arize.logging import CtxAdapter
|
|
21
|
-
from arize.types import Environments, SimilaritySearchParams
|
|
20
|
+
from arize.ml.types import Environments, SimilaritySearchParams
|
|
22
21
|
from arize.utils.dataframe import reset_dataframe_index
|
|
23
22
|
|
|
24
23
|
logger = logging.getLogger(__name__)
|
|
@@ -33,19 +32,20 @@ class ArizeExportClient:
|
|
|
33
32
|
space_id: str,
|
|
34
33
|
model_id: str,
|
|
35
34
|
environment: Environments,
|
|
36
|
-
start_time:
|
|
37
|
-
end_time:
|
|
35
|
+
start_time: datetime,
|
|
36
|
+
end_time: datetime,
|
|
38
37
|
where: str = "",
|
|
39
|
-
columns:
|
|
38
|
+
columns: list | None = None,
|
|
40
39
|
similarity_search_params: SimilaritySearchParams | None = None,
|
|
41
40
|
model_version: str = "",
|
|
42
41
|
batch_id: str = "",
|
|
43
42
|
include_actuals: bool = False,
|
|
44
43
|
stream_chunk_size: int | None = None,
|
|
45
|
-
):
|
|
46
|
-
"""
|
|
47
|
-
|
|
48
|
-
time interval and model environment,
|
|
44
|
+
) -> object:
|
|
45
|
+
"""Exports data of a specific model in the Arize platform to a pandas dataframe.
|
|
46
|
+
|
|
47
|
+
The export covers a defined time interval and model environment, and can
|
|
48
|
+
optionally be filtered by model version and/or batch id.
|
|
49
49
|
|
|
50
50
|
Args:
|
|
51
51
|
space_id (str): The id for the space where to export models from, can be retrieved from
|
|
@@ -104,14 +104,14 @@ class ArizeExportClient:
|
|
|
104
104
|
return pd.DataFrame()
|
|
105
105
|
progress_bar = self._get_progress_bar(num_recs)
|
|
106
106
|
list_of_df = []
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
try:
|
|
108
|
+
while True:
|
|
109
109
|
flight_batch = stream_reader.read_chunk()
|
|
110
110
|
batch_df = flight_batch.data.to_pandas()
|
|
111
111
|
list_of_df.append(batch_df)
|
|
112
112
|
progress_bar.update(batch_df.shape[0])
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
except StopIteration:
|
|
114
|
+
pass
|
|
115
115
|
progress_bar.close()
|
|
116
116
|
df = pd.concat(list_of_df)
|
|
117
117
|
null_columns = df.columns[df.isnull().all()]
|
|
@@ -139,16 +139,17 @@ class ArizeExportClient:
|
|
|
139
139
|
start_time: datetime,
|
|
140
140
|
end_time: datetime,
|
|
141
141
|
where: str = "",
|
|
142
|
-
columns:
|
|
142
|
+
columns: list | None = None,
|
|
143
143
|
similarity_search_params: SimilaritySearchParams | None = None,
|
|
144
144
|
model_version: str = "",
|
|
145
145
|
batch_id: str = "",
|
|
146
146
|
include_actuals: bool = False,
|
|
147
147
|
stream_chunk_size: int | None = None,
|
|
148
148
|
) -> None:
|
|
149
|
-
"""
|
|
150
|
-
|
|
151
|
-
interval and model environment,
|
|
149
|
+
"""Exports data of a specific model in the Arize platform to a parquet file.
|
|
150
|
+
|
|
151
|
+
The export covers a defined time interval and model environment, and can
|
|
152
|
+
optionally be filtered by model version and/or batch id.
|
|
152
153
|
|
|
153
154
|
Args:
|
|
154
155
|
path (str): path to the file to store exported data. File must be in parquet format and
|
|
@@ -208,17 +209,17 @@ class ArizeExportClient:
|
|
|
208
209
|
stream_chunk_size=stream_chunk_size,
|
|
209
210
|
)
|
|
210
211
|
if stream_reader is None:
|
|
211
|
-
return
|
|
212
|
+
return
|
|
212
213
|
progress_bar = self._get_progress_bar(num_recs)
|
|
213
214
|
with pq.ParquetWriter(path, schema=stream_reader.schema) as writer:
|
|
214
|
-
|
|
215
|
-
|
|
215
|
+
try:
|
|
216
|
+
while True:
|
|
216
217
|
flight_batch = stream_reader.read_chunk()
|
|
217
218
|
record_batch = flight_batch.data
|
|
218
219
|
writer.write_batch(record_batch)
|
|
219
220
|
progress_bar.update(record_batch.num_rows)
|
|
220
|
-
|
|
221
|
-
|
|
221
|
+
except StopIteration:
|
|
222
|
+
pass
|
|
222
223
|
progress_bar.close()
|
|
223
224
|
|
|
224
225
|
def _get_stream_reader(
|
|
@@ -233,9 +234,9 @@ class ArizeExportClient:
|
|
|
233
234
|
batch_id: str = "",
|
|
234
235
|
where: str = "",
|
|
235
236
|
similarity_search_params: SimilaritySearchParams | None = None,
|
|
236
|
-
columns:
|
|
237
|
+
columns: list | None = None,
|
|
237
238
|
stream_chunk_size: int | None = None,
|
|
238
|
-
) ->
|
|
239
|
+
) -> tuple[flight.FlightStreamReader | None, int]:
|
|
239
240
|
# Bind common context for this operation
|
|
240
241
|
log = CtxAdapter(
|
|
241
242
|
logger,
|
|
@@ -273,7 +274,7 @@ class ArizeExportClient:
|
|
|
273
274
|
validate_start_end_time(start_time, end_time)
|
|
274
275
|
|
|
275
276
|
# Create query descriptor
|
|
276
|
-
query_descriptor =
|
|
277
|
+
query_descriptor = flight_pb2.RecordQueryDescriptor(
|
|
277
278
|
space_id=space_id,
|
|
278
279
|
model_id=model_id,
|
|
279
280
|
environment=environment.name,
|
|
@@ -289,9 +290,11 @@ class ArizeExportClient:
|
|
|
289
290
|
else None
|
|
290
291
|
),
|
|
291
292
|
projected_columns=columns if columns else [],
|
|
292
|
-
stream_chunk_size=
|
|
293
|
-
|
|
294
|
-
|
|
293
|
+
stream_chunk_size=(
|
|
294
|
+
Int64Value(value=stream_chunk_size)
|
|
295
|
+
if stream_chunk_size is not None
|
|
296
|
+
else None
|
|
297
|
+
),
|
|
295
298
|
)
|
|
296
299
|
|
|
297
300
|
try:
|
|
@@ -306,17 +309,24 @@ class ArizeExportClient:
|
|
|
306
309
|
logger.warning("Query returns no data")
|
|
307
310
|
return None, 0
|
|
308
311
|
logger.debug("Ticket: %s", flight_info.endpoints[0].ticket)
|
|
309
|
-
|
|
310
|
-
# Retrieve the result set as flight stream reader
|
|
311
|
-
reader = self.flight_client.do_get(flight_info.endpoints[0].ticket)
|
|
312
|
-
return reader, flight_info.total_records
|
|
313
312
|
except Exception as e:
|
|
314
313
|
msg = f"Error getting flight info or do_get: {e}"
|
|
315
|
-
logger.
|
|
314
|
+
logger.exception(msg)
|
|
316
315
|
raise RuntimeError(msg) from e
|
|
316
|
+
# Retrieve the result set as flight stream reader
|
|
317
|
+
reader = self.flight_client.do_get(flight_info.endpoints[0].ticket)
|
|
318
|
+
return reader, flight_info.total_records
|
|
317
319
|
|
|
318
320
|
@staticmethod
|
|
319
|
-
def _get_progress_bar(num_recs):
|
|
321
|
+
def _get_progress_bar(num_recs: int) -> tqdm:
|
|
322
|
+
"""Create a progress bar for export operations.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
num_recs: Total number of records to export.
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
A tqdm progress bar configured for data export display.
|
|
329
|
+
"""
|
|
320
330
|
return tqdm(
|
|
321
331
|
total=num_recs,
|
|
322
332
|
desc=f" exporting {num_recs} rows",
|
|
@@ -329,8 +339,17 @@ class ArizeExportClient:
|
|
|
329
339
|
|
|
330
340
|
def _get_pb_similarity_search_params(
|
|
331
341
|
similarity_params: SimilaritySearchParams,
|
|
332
|
-
) ->
|
|
333
|
-
|
|
342
|
+
) -> flight_pb2.SimilaritySearchParams:
|
|
343
|
+
"""Convert SimilaritySearchParams to protocol buffer format.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
similarity_params: Similarity search parameters containing search column name,
|
|
347
|
+
threshold, and reference examples.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
A protocol buffer SimilaritySearchParams object for Flight requests.
|
|
351
|
+
"""
|
|
352
|
+
proto_params = flight_pb2.SimilaritySearchParams()
|
|
334
353
|
proto_params.search_column_name = similarity_params.search_column_name
|
|
335
354
|
proto_params.threshold = similarity_params.threshold
|
|
336
355
|
for ref in similarity_params.references:
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
from typing import List
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
import pandas as pd
|
|
@@ -28,12 +27,26 @@ logger = logging.getLogger(__name__)
|
|
|
28
27
|
# but the resulting error messages provide clarity on what the effect
|
|
29
28
|
# of the error is on the data; It should not prevent a user from continuing to use the data
|
|
30
29
|
class OtelTracingDataTransformer:
|
|
30
|
+
def _apply_column_transformation(
|
|
31
|
+
self, df: pd.DataFrame, col_name: str, transform_func: object
|
|
32
|
+
) -> str | None:
|
|
33
|
+
"""Apply a transformation to a column and return error message if it fails."""
|
|
34
|
+
try:
|
|
35
|
+
df[col_name] = df[col_name].apply(transform_func)
|
|
36
|
+
except Exception as e:
|
|
37
|
+
return (
|
|
38
|
+
f"Unable to transform json string data to a Python dict in column '{col_name}'; "
|
|
39
|
+
f"May encounter issues when importing data back into Arize; Error: {e}"
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
return None
|
|
43
|
+
|
|
31
44
|
def transform(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
32
|
-
errors:
|
|
45
|
+
errors: list[str] = []
|
|
33
46
|
|
|
34
47
|
# Convert list of json serializable strings columns to list of dictionaries for more
|
|
35
48
|
# conveinent data processing in Python
|
|
36
|
-
list_of_json_string_column_names:
|
|
49
|
+
list_of_json_string_column_names: list[str] = [
|
|
37
50
|
col.name
|
|
38
51
|
for col in [
|
|
39
52
|
SPAN_ATTRIBUTES_LLM_INPUT_MESSAGES_COL,
|
|
@@ -45,17 +58,13 @@ class OtelTracingDataTransformer:
|
|
|
45
58
|
if col.name in df.columns
|
|
46
59
|
]
|
|
47
60
|
for col_name in list_of_json_string_column_names:
|
|
48
|
-
|
|
49
|
-
df
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
f"May encounter issues when importing data back into Arize; Error: {e}"
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
json_string_column_names: List[str] = [
|
|
61
|
+
error = self._apply_column_transformation(
|
|
62
|
+
df, col_name, self._transform_value_to_list_of_dict
|
|
63
|
+
)
|
|
64
|
+
if error:
|
|
65
|
+
errors.append(error)
|
|
66
|
+
|
|
67
|
+
json_string_column_names: list[str] = [
|
|
59
68
|
col.name
|
|
60
69
|
for col in [
|
|
61
70
|
SPAN_ATTRIBUTES_LLM_PROMPT_TEMPLATE_VARIABLES_COL,
|
|
@@ -64,16 +73,14 @@ class OtelTracingDataTransformer:
|
|
|
64
73
|
if col.name in df.columns
|
|
65
74
|
]
|
|
66
75
|
for col_name in json_string_column_names:
|
|
67
|
-
|
|
68
|
-
df
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
f"May encounter issues when importing data back into Arize; Error: {e}"
|
|
73
|
-
)
|
|
76
|
+
error = self._apply_column_transformation(
|
|
77
|
+
df, col_name, self._transform_json_to_dict
|
|
78
|
+
)
|
|
79
|
+
if error:
|
|
80
|
+
errors.append(error)
|
|
74
81
|
|
|
75
82
|
# Clean json string columns since empty strings are equivalent here to None but are not valid json
|
|
76
|
-
dirty_string_column_names:
|
|
83
|
+
dirty_string_column_names: list[str] = [
|
|
77
84
|
col.name
|
|
78
85
|
for col in [
|
|
79
86
|
SPAN_ATTRIBUTES_LLM_INVOCATION_PARAMETERS_COL,
|
|
@@ -85,7 +92,7 @@ class OtelTracingDataTransformer:
|
|
|
85
92
|
df[col_name] = df[col_name].apply(self._clean_json_string)
|
|
86
93
|
|
|
87
94
|
# Convert timestamp columns to datetime objects
|
|
88
|
-
timestamp_column_names:
|
|
95
|
+
timestamp_column_names: list[str] = [
|
|
89
96
|
col.name
|
|
90
97
|
for col in [
|
|
91
98
|
SPAN_START_TIME_COL,
|
|
@@ -103,7 +110,9 @@ class OtelTracingDataTransformer:
|
|
|
103
110
|
|
|
104
111
|
return df
|
|
105
112
|
|
|
106
|
-
def _transform_value_to_list_of_dict(
|
|
113
|
+
def _transform_value_to_list_of_dict(
|
|
114
|
+
self, value: object
|
|
115
|
+
) -> list[object] | None:
|
|
107
116
|
if value is None:
|
|
108
117
|
return None
|
|
109
118
|
|
|
@@ -113,10 +122,11 @@ class OtelTracingDataTransformer:
|
|
|
113
122
|
for i in value
|
|
114
123
|
if self._is_non_empty_string(i)
|
|
115
124
|
]
|
|
116
|
-
|
|
125
|
+
if self._is_non_empty_string(value):
|
|
117
126
|
return [self._deserialize_json_string_to_dict(value)]
|
|
127
|
+
return None
|
|
118
128
|
|
|
119
|
-
def _transform_json_to_dict(self, value):
|
|
129
|
+
def _transform_json_to_dict(self, value: object) -> object | None:
|
|
120
130
|
if value is None:
|
|
121
131
|
return None
|
|
122
132
|
|
|
@@ -126,20 +136,21 @@ class OtelTracingDataTransformer:
|
|
|
126
136
|
if isinstance(value, str) and value == "":
|
|
127
137
|
# transform empty string to None
|
|
128
138
|
return None
|
|
139
|
+
return None
|
|
129
140
|
|
|
130
|
-
def _is_non_empty_string(self, value):
|
|
141
|
+
def _is_non_empty_string(self, value: object) -> bool:
|
|
131
142
|
return isinstance(value, str) and value != ""
|
|
132
143
|
|
|
133
|
-
def _deserialize_json_string_to_dict(self, value: str):
|
|
144
|
+
def _deserialize_json_string_to_dict(self, value: str) -> object:
|
|
134
145
|
try:
|
|
135
146
|
return json.loads(value)
|
|
136
147
|
except json.JSONDecodeError as e:
|
|
137
148
|
raise ValueError(f"Invalid JSON string: {value}") from e
|
|
138
149
|
|
|
139
|
-
def _clean_json_string(self, value):
|
|
150
|
+
def _clean_json_string(self, value: object) -> object | None:
|
|
140
151
|
return value if self._is_non_empty_string(value) else None
|
|
141
152
|
|
|
142
|
-
def _convert_timestamp_to_datetime(self, value):
|
|
153
|
+
def _convert_timestamp_to_datetime(self, value: object) -> object:
|
|
143
154
|
return (
|
|
144
155
|
pd.Timestamp(value, unit="ns")
|
|
145
156
|
if value and isinstance(value, (int, float, np.int64))
|
arize/_exporter/validation.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
4
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
6
6
|
from datetime import datetime
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def validate_input_type(
|
|
10
|
-
input:
|
|
10
|
+
input: object,
|
|
11
11
|
input_name: str,
|
|
12
12
|
input_type: type,
|
|
13
13
|
allow_none: bool = False,
|
|
@@ -28,7 +28,7 @@ def validate_input_type(
|
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
def validate_input_value(
|
|
31
|
-
input:
|
|
31
|
+
input: object,
|
|
32
32
|
input_name: str,
|
|
33
33
|
choices: tuple,
|
|
34
34
|
) -> None:
|