junshan-kit 2.3.0__py2.py3-none-any.whl → 2.3.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
junshan_kit/DataProcessor.py
CHANGED
@@ -18,6 +18,7 @@ class CSV_TO_Pandas:
|
|
18
18
|
drop_cols: list,
|
19
19
|
label_col: str,
|
20
20
|
label_map: dict,
|
21
|
+
data_name: str,
|
21
22
|
print_info=False,
|
22
23
|
):
|
23
24
|
"""
|
@@ -97,7 +98,7 @@ class CSV_TO_Pandas:
|
|
97
98
|
|
98
99
|
# Step 6: Print dataset information
|
99
100
|
print("\n" + "=" * 80)
|
100
|
-
print(f"{'
|
101
|
+
print(f"{'{data_name} - Info':^70}")
|
101
102
|
print("=" * 80)
|
102
103
|
print(f"{'Original size:':<40} {m_original} rows x {n_original} cols")
|
103
104
|
print(
|
junshan_kit/DataSets.py
CHANGED
@@ -45,7 +45,7 @@ def _run(csv_path, data_name, drop_cols, label_col, label_map, print_info):
|
|
45
45
|
junshan_kit.kit.unzip_file(f'./exp_data/{data_name}/{data_name}.zip', f'./exp_data/{data_name}')
|
46
46
|
|
47
47
|
cleaner = junshan_kit.DataProcessor.CSV_TO_Pandas()
|
48
|
-
df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, print_info=print_info)
|
48
|
+
df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, data_name, print_info=print_info)
|
49
49
|
|
50
50
|
return df
|
51
51
|
|
@@ -67,7 +67,7 @@ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print
|
|
67
67
|
return df
|
68
68
|
|
69
69
|
|
70
|
-
def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators
|
70
|
+
def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators", print_info = False):
|
71
71
|
csv_path = f'./exp_data/{data_name}/diabetes_dataset.csv'
|
72
72
|
drop_cols = []
|
73
73
|
label_col = 'diagnosed_diabetes'
|
@@ -78,7 +78,7 @@ def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators D
|
|
78
78
|
return df
|
79
79
|
|
80
80
|
|
81
|
-
def electric_vehicle_population_data(data_name = "Electric Vehicle Population
|
81
|
+
def electric_vehicle_population_data(data_name = "Electric Vehicle Population", print_info = False):
|
82
82
|
csv_path = f'./exp_data/{data_name}/Electric_Vehicle_Population_Data.csv'
|
83
83
|
drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']
|
84
84
|
label_col = 'Electric Vehicle Type'
|
@@ -91,7 +91,7 @@ def electric_vehicle_population_data(data_name = "Electric Vehicle Population Da
|
|
91
91
|
|
92
92
|
return df
|
93
93
|
|
94
|
-
def global_house_purchase_dataset(data_name = "Global House Purchase
|
94
|
+
def global_house_purchase_dataset(data_name = "Global House Purchase", print_info = False):
|
95
95
|
csv_path = f'./exp_data/{data_name}/global_house_purchase_dataset.csv'
|
96
96
|
drop_cols = ['property_id']
|
97
97
|
label_col = 'Electric Vehicle Type'
|
@@ -102,7 +102,7 @@ def global_house_purchase_dataset(data_name = "Global House Purchase Dataset", p
|
|
102
102
|
return df
|
103
103
|
|
104
104
|
|
105
|
-
def health_lifestyle_dataset(data_name = "
|
105
|
+
def health_lifestyle_dataset(data_name = "Health Lifestyle", print_info = False):
|
106
106
|
csv_path = f'./exp_data/{data_name}/health_lifestyle_dataset.csv'
|
107
107
|
drop_cols = ['id']
|
108
108
|
label_col = 'decision'
|
@@ -0,0 +1,7 @@
|
|
1
|
+
junshan_kit/DataProcessor.py,sha256=QnYsqt2j4amZ4U04Urcu91RJs0du-tkl0N2lwxnTy1U,4472
|
2
|
+
junshan_kit/DataSets.py,sha256=2P2AMQjQDKx8FITa5cNBaiFuUhPuWXKkgdlBM02LfPQ,3954
|
3
|
+
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
|
5
|
+
junshan_kit-2.3.1.dist-info/METADATA,sha256=cCVc5fHfyWVno2RFPc8xskJ7XO8e7sWHYmV-8udM5s4,329
|
6
|
+
junshan_kit-2.3.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
7
|
+
junshan_kit-2.3.1.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
junshan_kit/DataProcessor.py,sha256=eryVmS5BFZj8wjDN2QWVHqkbFgFuWU0HXV9s6TGf9QM,4442
|
2
|
-
junshan_kit/DataSets.py,sha256=rf5AVlA9DxP7wBpXjSO1_xznCMuxEoK50TqExafwHhc,3972
|
3
|
-
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
|
5
|
-
junshan_kit-2.3.0.dist-info/METADATA,sha256=9NlU4YOD0zx5F5tLIMREKKxn-LwYD8-7IFVtp7DvMNM,329
|
6
|
-
junshan_kit-2.3.0.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
7
|
-
junshan_kit-2.3.0.dist-info/RECORD,,
|
File without changes
|