junshan-kit 2.3.0__py2.py3-none-any.whl → 2.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ class CSV_TO_Pandas:
18
18
  drop_cols: list,
19
19
  label_col: str,
20
20
  label_map: dict,
21
+ data_name: str,
21
22
  print_info=False,
22
23
  ):
23
24
  """
@@ -97,7 +98,7 @@ class CSV_TO_Pandas:
97
98
 
98
99
  # Step 6: Print dataset information
99
100
  print("\n" + "=" * 80)
100
- print(f"{'Dataset Info':^70}")
101
+ print(f"{'{data_name} - Info':^70}")
101
102
  print("=" * 80)
102
103
  print(f"{'Original size:':<40} {m_original} rows x {n_original} cols")
103
104
  print(
junshan_kit/DataSets.py CHANGED
@@ -45,7 +45,7 @@ def _run(csv_path, data_name, drop_cols, label_col, label_map, print_info):
45
45
  junshan_kit.kit.unzip_file(f'./exp_data/{data_name}/{data_name}.zip', f'./exp_data/{data_name}')
46
46
 
47
47
  cleaner = junshan_kit.DataProcessor.CSV_TO_Pandas()
48
- df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, print_info=print_info)
48
+ df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, data_name, print_info=print_info)
49
49
 
50
50
  return df
51
51
 
@@ -67,7 +67,7 @@ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print
67
67
  return df
68
68
 
69
69
 
70
- def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators Dataset", print_info = False):
70
+ def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators", print_info = False):
71
71
  csv_path = f'./exp_data/{data_name}/diabetes_dataset.csv'
72
72
  drop_cols = []
73
73
  label_col = 'diagnosed_diabetes'
@@ -78,7 +78,7 @@ def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators D
78
78
  return df
79
79
 
80
80
 
81
- def electric_vehicle_population_data(data_name = "Electric Vehicle Population Data", print_info = False):
81
+ def electric_vehicle_population_data(data_name = "Electric Vehicle Population", print_info = False):
82
82
  csv_path = f'./exp_data/{data_name}/Electric_Vehicle_Population_Data.csv'
83
83
  drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']
84
84
  label_col = 'Electric Vehicle Type'
@@ -91,7 +91,7 @@ def electric_vehicle_population_data(data_name = "Electric Vehicle Population Da
91
91
 
92
92
  return df
93
93
 
94
- def global_house_purchase_dataset(data_name = "Global House Purchase Dataset", print_info = False):
94
+ def global_house_purchase_dataset(data_name = "Global House Purchase", print_info = False):
95
95
  csv_path = f'./exp_data/{data_name}/global_house_purchase_dataset.csv'
96
96
  drop_cols = ['property_id']
97
97
  label_col = 'Electric Vehicle Type'
@@ -102,7 +102,7 @@ def global_house_purchase_dataset(data_name = "Global House Purchase Dataset", p
102
102
  return df
103
103
 
104
104
 
105
- def health_lifestyle_dataset(data_name = "Health_lifestyle_dataset", print_info = False):
105
+ def health_lifestyle_dataset(data_name = "Health Lifestyle", print_info = False):
106
106
  csv_path = f'./exp_data/{data_name}/health_lifestyle_dataset.csv'
107
107
  drop_cols = ['id']
108
108
  label_col = 'decision'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: junshan_kit
3
- Version: 2.3.0
3
+ Version: 2.3.1
4
4
  Summary: This is an optimization tool.
5
5
  Author-email: Junshan Yin <junshanyin@163.com>
6
6
  Requires-Dist: kaggle==1.7.4.5
@@ -0,0 +1,7 @@
1
+ junshan_kit/DataProcessor.py,sha256=QnYsqt2j4amZ4U04Urcu91RJs0du-tkl0N2lwxnTy1U,4472
2
+ junshan_kit/DataSets.py,sha256=2P2AMQjQDKx8FITa5cNBaiFuUhPuWXKkgdlBM02LfPQ,3954
3
+ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
5
+ junshan_kit-2.3.1.dist-info/METADATA,sha256=cCVc5fHfyWVno2RFPc8xskJ7XO8e7sWHYmV-8udM5s4,329
6
+ junshan_kit-2.3.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
+ junshan_kit-2.3.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- junshan_kit/DataProcessor.py,sha256=eryVmS5BFZj8wjDN2QWVHqkbFgFuWU0HXV9s6TGf9QM,4442
2
- junshan_kit/DataSets.py,sha256=rf5AVlA9DxP7wBpXjSO1_xznCMuxEoK50TqExafwHhc,3972
3
- junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
5
- junshan_kit-2.3.0.dist-info/METADATA,sha256=9NlU4YOD0zx5F5tLIMREKKxn-LwYD8-7IFVtp7DvMNM,329
6
- junshan_kit-2.3.0.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
- junshan_kit-2.3.0.dist-info/RECORD,,