data-prep-toolkit 1.1.7.dev5__py3-none-any.whl → 1.1.8.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_prep_toolkit-1.1.7.dev5.dist-info → data_prep_toolkit-1.1.8.dev0.dist-info}/METADATA +6 -4
- {data_prep_toolkit-1.1.7.dev5.dist-info → data_prep_toolkit-1.1.8.dev0.dist-info}/RECORD +6 -6
- {data_prep_toolkit-1.1.7.dev5.dist-info → data_prep_toolkit-1.1.8.dev0.dist-info}/WHEEL +1 -1
- data_processing/utils/model_loader_registry.py +36 -0
- data_processing/utils/pipinstaller.py +22 -9
- {data_prep_toolkit-1.1.7.dev5.dist-info → data_prep_toolkit-1.1.8.dev0.dist-info}/top_level.txt +0 -0
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data_prep_toolkit
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.8.dev0
|
|
4
4
|
Summary: Data Preparation Toolkit Library for Ray and Python
|
|
5
5
|
Author-email: Maroun Touma <touma@us.ibm.com>
|
|
6
6
|
License: Apache-2.0
|
|
7
7
|
Keywords: data,data preprocessing,data preparation,llm,generative,ai,fine-tuning,llmapps
|
|
8
|
-
Requires-Python: <3.
|
|
8
|
+
Requires-Python: <3.15,>=3.10
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
Requires-Dist: numpy<2.0.0
|
|
11
|
-
Requires-Dist: pyarrow<=17.0.0
|
|
11
|
+
Requires-Dist: pyarrow<=17.0.0; python_version <= "3.12"
|
|
12
|
+
Requires-Dist: pyarrow; python_version > "3.12"
|
|
12
13
|
Requires-Dist: boto3>=1.41
|
|
13
14
|
Requires-Dist: mmh3
|
|
14
15
|
Requires-Dist: psutil
|
|
@@ -29,7 +30,8 @@ Requires-Dist: fasttext-wheel; extra == "dev"
|
|
|
29
30
|
Requires-Dist: huggingface-hub<1.0.0,>=0.21.4; extra == "dev"
|
|
30
31
|
Requires-Dist: transformers; extra == "dev"
|
|
31
32
|
Provides-Extra: ray
|
|
32
|
-
Requires-Dist: ray[default]==2.36.1; extra == "ray"
|
|
33
|
+
Requires-Dist: ray[default]==2.36.1; python_version < "3.13" and extra == "ray"
|
|
34
|
+
Requires-Dist: ray[default]==2.45.0; python_version >= "3.13" and extra == "ray"
|
|
33
35
|
Requires-Dist: fastapi>=0.110.2; extra == "ray"
|
|
34
36
|
Requires-Dist: pillow>=10.3.0; extra == "ray"
|
|
35
37
|
Provides-Extra: spark
|
|
@@ -50,10 +50,10 @@ data_processing/utils/cli_utils.py,sha256=A3LPVbr7-8JJwMycYYI_x27pW-Hqs7OAw1hZpt
|
|
|
50
50
|
data_processing/utils/config.py,sha256=tKVqP4R-ge7dg7DDLDUywknJyv8RDDrFynZLQV-VQp8,1900
|
|
51
51
|
data_processing/utils/log.py,sha256=zF5pYcBPndwxSZpBZCAdngc3hA8UB2rJd3LHl-OgYMA,7054
|
|
52
52
|
data_processing/utils/model_loader.py,sha256=zVTspZ8PsgUjpM38_IilbSbh2gVjgPFVak4Nn6VInhA,3448
|
|
53
|
-
data_processing/utils/model_loader_registry.py,sha256=
|
|
53
|
+
data_processing/utils/model_loader_registry.py,sha256=BTQu8x686bWHy_fC5mE9n-JdgJyEQcIMBzdjteo2C5Y,3696
|
|
54
54
|
data_processing/utils/multilock.py,sha256=59BlwY2JUcthkIChmyoy5BveC8wseL31cngMs-A008o,5881
|
|
55
55
|
data_processing/utils/params_utils.py,sha256=T6Dd6I1HpAtRiMmRnSjqVzfRncXg2QK1nkEbZ7khuhw,6706
|
|
56
|
-
data_processing/utils/pipinstaller.py,sha256=
|
|
56
|
+
data_processing/utils/pipinstaller.py,sha256=M4ry1Y5bqa43u5aUO8qxxsnY7jne_AQqtKhI4p-8RDI,3113
|
|
57
57
|
data_processing/utils/transform_configuration.json,sha256=6YBw0Hk2mokY6JBn1kR6L9AkV_yivbFrpSoHecAJp9o,4562
|
|
58
58
|
data_processing/utils/transform_configurator.py,sha256=7jlirBcL_c3FwvOSeMvTEcYGBEUOmA8EqmGAUZwp8VU,3639
|
|
59
59
|
data_processing/utils/transform_utils.py,sha256=0DlW3T--m1QJnifRLgujaAJhOvjONmNpTwq0av1sJng,11486
|
|
@@ -82,7 +82,7 @@ data_processing_spark/runtime/spark/transform_runtime.py,sha256=feIB6Hr-Omy1C9LJ
|
|
|
82
82
|
data_processing_spark/test_support/transform/__init__.py,sha256=0b40r5bqo4gLPEL_nAdF36zOfowu7hf-EVbeHtdtL-4,910
|
|
83
83
|
data_processing_spark/test_support/transform/noop_folder_transform.py,sha256=EJCW5XPrAjVIb2lOfq44wV8Hetwo7Pj7ZRLvZBLtTPA,2225
|
|
84
84
|
data_processing_spark/test_support/transform/noop_transform.py,sha256=wBg2MVx4W6ARiuADt_4LeNoj5puniXLAcMAqOzVhueE,1732
|
|
85
|
-
data_prep_toolkit-1.1.
|
|
86
|
-
data_prep_toolkit-1.1.
|
|
87
|
-
data_prep_toolkit-1.1.
|
|
88
|
-
data_prep_toolkit-1.1.
|
|
85
|
+
data_prep_toolkit-1.1.8.dev0.dist-info/METADATA,sha256=upRDhZXr2bd9GNeokYmQWGsPGr5YA3CmkiIYuN0oXVw,2638
|
|
86
|
+
data_prep_toolkit-1.1.8.dev0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
87
|
+
data_prep_toolkit-1.1.8.dev0.dist-info/top_level.txt,sha256=XGMDmY55_pe5KeRWvO0un9a640e2v99tzbBBtjNybPM,58
|
|
88
|
+
data_prep_toolkit-1.1.8.dev0.dist-info/RECORD,,
|
|
@@ -74,3 +74,39 @@ def load_fasttext_model(model_path: str, token: str = None, **kwargs):
|
|
|
74
74
|
|
|
75
75
|
model = fasttext.load_model(model_path)
|
|
76
76
|
return model
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@register_model_loader("yolo")
|
|
80
|
+
def load_yolo_model(model_path: str, token: str = None, **kwargs):
|
|
81
|
+
from ultralytics import YOLO
|
|
82
|
+
from huggingface_hub import hf_hub_download
|
|
83
|
+
|
|
84
|
+
if os.path.isfile(model_path):
|
|
85
|
+
model_path = model_path
|
|
86
|
+
|
|
87
|
+
elif os.path.isdir(model_path):
|
|
88
|
+
found = False
|
|
89
|
+
for root, _, files in os.walk(model_path):
|
|
90
|
+
for f in files:
|
|
91
|
+
if f.endswith(".pt"):
|
|
92
|
+
model_path = os.path.join(root, f)
|
|
93
|
+
found = True
|
|
94
|
+
break
|
|
95
|
+
if not found:
|
|
96
|
+
raise FileNotFoundError(f"No .pt file found in : {model_path}")
|
|
97
|
+
|
|
98
|
+
else:
|
|
99
|
+
# assume hugging face repo and download .pt
|
|
100
|
+
filename = kwargs.get("model_filename", "model.pt")
|
|
101
|
+
subfolder = kwargs.get("subfolder", None)
|
|
102
|
+
revision = kwargs.get("revision", None)
|
|
103
|
+
model_path = hf_hub_download(
|
|
104
|
+
repo_id=model_path,
|
|
105
|
+
subfolder=subfolder,
|
|
106
|
+
revision=revision,
|
|
107
|
+
filename=filename,
|
|
108
|
+
token=token,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
model = YOLO(model_path)
|
|
112
|
+
return model
|
|
@@ -18,7 +18,7 @@ warnings.filterwarnings("ignore")
|
|
|
18
18
|
|
|
19
19
|
import subprocess
|
|
20
20
|
import sys
|
|
21
|
-
|
|
21
|
+
import shutil
|
|
22
22
|
|
|
23
23
|
class PipInstaller:
|
|
24
24
|
"""
|
|
@@ -36,14 +36,27 @@ class PipInstaller:
|
|
|
36
36
|
"""
|
|
37
37
|
# build pip package string
|
|
38
38
|
package = f"git+{project}#subdirectory={subdirectory}&egg={name}"
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
39
|
+
|
|
40
|
+
uv_path = shutil.which("uv")
|
|
41
|
+
if uv_path:
|
|
42
|
+
try:
|
|
43
|
+
# Use 'uv pip install'.
|
|
44
|
+
# Note: uv requires --system to install into the system Python
|
|
45
|
+
# instead of a virtual environment.
|
|
46
|
+
subprocess.check_call([uv_path, "pip", "install", "--system", package])
|
|
47
|
+
return True
|
|
48
|
+
except Exception as e:
|
|
49
|
+
print(f"uv installation failed for {package}: {e}")
|
|
50
|
+
return False
|
|
51
|
+
else:
|
|
52
|
+
try:
|
|
53
|
+
# Do uv pip install (in sub process)
|
|
54
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
|
|
55
|
+
return True
|
|
56
|
+
except Exception as e:
|
|
57
|
+
# process exception
|
|
58
|
+
print(f"Exception installing package {name}: {e}")
|
|
59
|
+
return False
|
|
47
60
|
|
|
48
61
|
@staticmethod
|
|
49
62
|
def validate(name: str) -> bool:
|
{data_prep_toolkit-1.1.7.dev5.dist-info → data_prep_toolkit-1.1.8.dev0.dist-info}/top_level.txt
RENAMED
|
File without changes
|