data-prep-toolkit 0.2.2.dev2__py3-none-any.whl → 0.2.3.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_prep_toolkit-0.2.2.dev2.dist-info → data_prep_toolkit-0.2.3.dev0.dist-info}/METADATA +2 -1
- {data_prep_toolkit-0.2.2.dev2.dist-info → data_prep_toolkit-0.2.3.dev0.dist-info}/RECORD +5 -5
- {data_prep_toolkit-0.2.2.dev2.dist-info → data_prep_toolkit-0.2.3.dev0.dist-info}/WHEEL +1 -1
- data_processing/utils/transform_utils.py +16 -2
- {data_prep_toolkit-0.2.2.dev2.dist-info → data_prep_toolkit-0.2.3.dev0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: data_prep_toolkit
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3.dev0
|
|
4
4
|
Summary: Data Preparation Toolkit Library for Ray and Python
|
|
5
5
|
Author-email: Maroun Touma <touma@us.ibm.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -13,6 +13,7 @@ Requires-Dist: boto3==1.34.69
|
|
|
13
13
|
Requires-Dist: argparse
|
|
14
14
|
Requires-Dist: mmh3
|
|
15
15
|
Requires-Dist: psutil
|
|
16
|
+
Requires-Dist: polars>=1.9.0
|
|
16
17
|
Provides-Extra: dev
|
|
17
18
|
Requires-Dist: twine; extra == "dev"
|
|
18
19
|
Requires-Dist: pytest>=7.3.2; extra == "dev"
|
|
@@ -47,7 +47,7 @@ data_processing/utils/params_utils.py,sha256=oAKY3wC8b17rDUJGqX19-rAQHDc9SQn1ksT
|
|
|
47
47
|
data_processing/utils/pipinstaller.py,sha256=PxFNwEy8v4FqjwYgrPhH0UTrCgsJvM5WAE2fKylsk2Q,2511
|
|
48
48
|
data_processing/utils/transform_configuration.json,sha256=6YBw0Hk2mokY6JBn1kR6L9AkV_yivbFrpSoHecAJp9o,4562
|
|
49
49
|
data_processing/utils/transform_configurator.py,sha256=9OHSCQ8rFSoDdMW6ZCHYdNe6thRwV9zOaRPnLkWNMYE,3601
|
|
50
|
-
data_processing/utils/transform_utils.py,sha256=
|
|
50
|
+
data_processing/utils/transform_utils.py,sha256=1IEowOYQA6HOGEalqujbDVatrBaImnuY5OKmUYGaGwI,9068
|
|
51
51
|
data_processing/utils/unrecoverable.py,sha256=cbF74AGK1IdRor_L1w_hPwglV_b2blP6Ad4ET79xrl0,831
|
|
52
52
|
data_processing_ray/runtime/ray/__init__.py,sha256=vjQOvb_OJNq3c1F_tG3WjO-pciY77Z1lETO2Ha_GVbw,784
|
|
53
53
|
data_processing_ray/runtime/ray/execution_configuration.py,sha256=C9YFixlATr7PPpkVQ0WzjCCPTWFuP80W2rnzY1bbp5I,4628
|
|
@@ -72,7 +72,7 @@ data_processing_spark/runtime/spark/transform_runtime.py,sha256=je27rTRdd-5Wtd8n
|
|
|
72
72
|
data_processing_spark/test_support/transform/__init__.py,sha256=FQJyj7z1hXQynngMVQlCTJxTh2bdc4jN4220CBmLTqE,872
|
|
73
73
|
data_processing_spark/test_support/transform/noop_folder_transform.py,sha256=z0jXCVKJYHPqB9ZTfUxnQkUVDnmfWjvss4_I3QZ8JZ4,2187
|
|
74
74
|
data_processing_spark/test_support/transform/noop_transform.py,sha256=0FR3o-LnXf-UFS5gU0j-i4LVlw1mHDxGaPI40dkkIKY,1694
|
|
75
|
-
data_prep_toolkit-0.2.
|
|
76
|
-
data_prep_toolkit-0.2.
|
|
77
|
-
data_prep_toolkit-0.2.
|
|
78
|
-
data_prep_toolkit-0.2.
|
|
75
|
+
data_prep_toolkit-0.2.3.dev0.dist-info/METADATA,sha256=u2UV51dNeRPKa4R0bEOI6udMGEodA8JzlCffyt-xXt8,2269
|
|
76
|
+
data_prep_toolkit-0.2.3.dev0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
77
|
+
data_prep_toolkit-0.2.3.dev0.dist-info/top_level.txt,sha256=XGMDmY55_pe5KeRWvO0un9a640e2v99tzbBBtjNybPM,58
|
|
78
|
+
data_prep_toolkit-0.2.3.dev0.dist-info/RECORD,,
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
################################################################################
|
|
12
12
|
|
|
13
13
|
import hashlib
|
|
14
|
+
import io
|
|
14
15
|
import os
|
|
15
16
|
import string
|
|
16
17
|
import sys
|
|
@@ -144,8 +145,21 @@ class TransformUtils:
|
|
|
144
145
|
table = pq.read_table(reader, schema=schema)
|
|
145
146
|
return table
|
|
146
147
|
except Exception as e:
|
|
147
|
-
logger.
|
|
148
|
-
|
|
148
|
+
logger.warning(f"Could not convert bytes to pyarrow: {e}")
|
|
149
|
+
|
|
150
|
+
# We have seen this exception before when using pyarrow, but polars does not throw it.
|
|
151
|
+
# "Nested data conversions not implemented for chunked array outputs"
|
|
152
|
+
# See issue 816 https://github.com/IBM/data-prep-kit/issues/816.
|
|
153
|
+
logger.info(f"Attempting read of pyarrow Table using polars")
|
|
154
|
+
try:
|
|
155
|
+
import polars
|
|
156
|
+
|
|
157
|
+
df = polars.read_parquet(io.BytesIO(data))
|
|
158
|
+
table = df.to_arrow()
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.error(f"Could not convert bytes to pyarrow using polars: {e}. Skipping.")
|
|
161
|
+
table = None
|
|
162
|
+
return table
|
|
149
163
|
|
|
150
164
|
@staticmethod
|
|
151
165
|
def convert_arrow_to_binary(table: pa.Table) -> bytes:
|
{data_prep_toolkit-0.2.2.dev2.dist-info → data_prep_toolkit-0.2.3.dev0.dist-info}/top_level.txt
RENAMED
|
File without changes
|