junshan-kit 2.2.2__py2.py3-none-any.whl → 2.2.3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
junshan_kit/DataProcessor.py
CHANGED
@@ -9,7 +9,7 @@ import pandas as pd
|
|
9
9
|
import os
|
10
10
|
from sklearn.preprocessing import StandardScaler
|
11
11
|
import junshan_kit.datahub
|
12
|
-
import
|
12
|
+
import zipfile
|
13
13
|
|
14
14
|
class CSVToPandasMeta:
|
15
15
|
def __init__(self):
|
@@ -114,6 +114,32 @@ class CSV_TO_Pandas:
|
|
114
114
|
def __init__(self):
|
115
115
|
pass
|
116
116
|
|
117
|
+
def unzip_file(self, zip_path: str, unzip_folder: str):
|
118
|
+
"""
|
119
|
+
Args:
|
120
|
+
zip_path (str): Path to the ZIP file to extract.
|
121
|
+
dest_folder (str, optional): Folder to extract files into.
|
122
|
+
If None, the function will create a folder with the same
|
123
|
+
name as the ZIP file (without extension).
|
124
|
+
|
125
|
+
Examples:
|
126
|
+
>>> zip_path = "./downloads/data.zip"
|
127
|
+
>>> unzip_folder = "./exp_data/data"
|
128
|
+
>>> unzip_file(zip_path, unzip_folder)
|
129
|
+
"""
|
130
|
+
|
131
|
+
if unzip_folder is None:
|
132
|
+
unzip_folder = os.path.splitext(os.path.basename(zip_path))[0]
|
133
|
+
|
134
|
+
os.makedirs(unzip_folder, exist_ok=True)
|
135
|
+
|
136
|
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
137
|
+
zip_ref.extractall(unzip_folder)
|
138
|
+
|
139
|
+
print(f"✅ Extracted '{zip_path}' to '{os.path.abspath(unzip_folder)}'")
|
140
|
+
|
141
|
+
# -----------------------------------------------------
|
142
|
+
|
117
143
|
def clean_data(self, csv_path, drop_cols: list, label_col: str, label_map: dict, print_info = False):
|
118
144
|
# Step 0: Load the dataset
|
119
145
|
df = pd.read_csv(csv_path)
|
@@ -1,9 +1,9 @@
|
|
1
|
-
junshan_kit/DataProcessor.py,sha256=
|
1
|
+
junshan_kit/DataProcessor.py,sha256=rDL3NLD-WlT3x6x74XkB_542_sk3BrnIk5p4rYlVn5o,7212
|
2
2
|
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
junshan_kit/datahub.py,sha256=_Q_3AlZ8vk1Ma6h9I44SxWBA8w9m1CQNvYztMcsxzUo,5377
|
4
4
|
junshan_kit/kit.py,sha256=h4Q_87hEJbXH4A9ryaGMu_nle5RlM8OR_PaW_hWCVBY,1040
|
5
5
|
junshan_kit/meta.py,sha256=SiY9P93aABrksNE6G3ft5gzcuP2cUgc4Vx6LH7ZFmzg,10113
|
6
6
|
junshan_kit/test.py,sha256=FgzG4oG7kkq6rWasxdBSY1qx_B0navRI5Ei-wJ1Dvo0,180
|
7
|
-
junshan_kit-2.2.
|
8
|
-
junshan_kit-2.2.
|
9
|
-
junshan_kit-2.2.
|
7
|
+
junshan_kit-2.2.3.dist-info/METADATA,sha256=h4_Z0LMIigJgrkt2hD5TcYJwOCkArMRySh-OopgZ9Xo,329
|
8
|
+
junshan_kit-2.2.3.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
9
|
+
junshan_kit-2.2.3.dist-info/RECORD,,
|
File without changes
|