junshan-kit 2.2.9__py2.py3-none-any.whl → 2.3.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ class CSV_TO_Pandas:
18
18
  drop_cols: list,
19
19
  label_col: str,
20
20
  label_map: dict,
21
+ data_name: str,
21
22
  print_info=False,
22
23
  ):
23
24
  """
@@ -97,7 +98,7 @@ class CSV_TO_Pandas:
97
98
 
98
99
  # Step 6: Print dataset information
99
100
  print("\n" + "=" * 80)
100
- print(f"{'Dataset Info':^70}")
101
+ print(f"{'{data_name} - Info':^70}")
101
102
  print("=" * 80)
102
103
  print(f"{'Original size:':<40} {m_original} rows x {n_original} cols")
103
104
  print(
junshan_kit/DataSets.py CHANGED
@@ -12,7 +12,7 @@ import junshan_kit.kit
12
12
  from sklearn.preprocessing import StandardScaler
13
13
 
14
14
  #----------------------------------------------------------
15
- def download_data(data_name):
15
+ def _download_data(data_name):
16
16
  from junshan_kit.kit import JianguoyunDownloaderFirefox, JianguoyunDownloaderChrome
17
17
 
18
18
  # User selects download method
@@ -36,16 +36,16 @@ def download_data(data_name):
36
36
  else:
37
37
  print("❌ Invalid choice. Please enter 1 or 2.\n")
38
38
 
39
- def run(csv_path, data_name, drop_cols, label_col, label_map, print_info):
39
+ def _run(csv_path, data_name, drop_cols, label_col, label_map, print_info):
40
40
  if not os.path.exists(csv_path):
41
41
  print('\n' + '*'*60)
42
42
  print(f"Please download the data.")
43
43
  print(csv_path)
44
- download_data(data_name)
44
+ _download_data(data_name)
45
45
  junshan_kit.kit.unzip_file(f'./exp_data/{data_name}/{data_name}.zip', f'./exp_data/{data_name}')
46
46
 
47
47
  cleaner = junshan_kit.DataProcessor.CSV_TO_Pandas()
48
- df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, print_info=print_info)
48
+ df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, data_name, print_info=print_info)
49
49
 
50
50
  return df
51
51
 
@@ -59,26 +59,26 @@ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print
59
59
 
60
60
  csv_path = f'./exp_data/{data_name}/creditcard.csv'
61
61
  drop_cols = []
62
- label_col = 'diagnosed_diabetes'
62
+ label_col = 'Class'
63
63
  label_map = {0: -1, 1: 1}
64
64
 
65
- df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
65
+ df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
66
66
 
67
67
  return df
68
68
 
69
69
 
70
- def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators Dataset", print_info = False):
70
+ def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators", print_info = False):
71
71
  csv_path = f'./exp_data/{data_name}/diabetes_dataset.csv'
72
72
  drop_cols = []
73
- label_col = 'Class'
73
+ label_col = 'diagnosed_diabetes'
74
74
  label_map = {0: -1, 1: 1}
75
75
 
76
- df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
76
+ df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
77
77
 
78
78
  return df
79
79
 
80
80
 
81
- def electric_vehicle_population_data(data_name = "Electric Vehicle Population Data", print_info = False):
81
+ def electric_vehicle_population_data(data_name = "Electric Vehicle Population", print_info = False):
82
82
  csv_path = f'./exp_data/{data_name}/Electric_Vehicle_Population_Data.csv'
83
83
  drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']
84
84
  label_col = 'Electric Vehicle Type'
@@ -87,28 +87,28 @@ def electric_vehicle_population_data(data_name = "Electric Vehicle Population Da
87
87
  'Plug-in Hybrid Electric Vehicle (PHEV)': -1
88
88
  }
89
89
 
90
- df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
90
+ df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
91
91
 
92
92
  return df
93
93
 
94
- def global_house_purchase_dataset(data_name = "Global House Purchase Dataset", print_info = False):
94
+ def global_house_purchase_dataset(data_name = "Global House Purchase", print_info = False):
95
95
  csv_path = f'./exp_data/{data_name}/global_house_purchase_dataset.csv'
96
96
  drop_cols = ['property_id']
97
97
  label_col = 'Electric Vehicle Type'
98
98
  label_map = {0: -1, 1: 1}
99
99
 
100
- df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
100
+ df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
101
101
 
102
102
  return df
103
103
 
104
104
 
105
- def health_lifestyle_dataset(data_name = "Health_lifestyle_dataset", print_info = False):
105
+ def health_lifestyle_dataset(data_name = "Health Lifestyle", print_info = False):
106
106
  csv_path = f'./exp_data/{data_name}/health_lifestyle_dataset.csv'
107
107
  drop_cols = ['id']
108
108
  label_col = 'decision'
109
109
  label_map = {0: -1, 1: 1}
110
110
 
111
- df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
111
+ df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
112
112
 
113
113
  return df
114
114
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: junshan_kit
3
- Version: 2.2.9
3
+ Version: 2.3.1
4
4
  Summary: This is an optimization tool.
5
5
  Author-email: Junshan Yin <junshanyin@163.com>
6
6
  Requires-Dist: kaggle==1.7.4.5
@@ -0,0 +1,7 @@
1
+ junshan_kit/DataProcessor.py,sha256=QnYsqt2j4amZ4U04Urcu91RJs0du-tkl0N2lwxnTy1U,4472
2
+ junshan_kit/DataSets.py,sha256=2P2AMQjQDKx8FITa5cNBaiFuUhPuWXKkgdlBM02LfPQ,3954
3
+ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
5
+ junshan_kit-2.3.1.dist-info/METADATA,sha256=cCVc5fHfyWVno2RFPc8xskJ7XO8e7sWHYmV-8udM5s4,329
6
+ junshan_kit-2.3.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
+ junshan_kit-2.3.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- junshan_kit/DataProcessor.py,sha256=eryVmS5BFZj8wjDN2QWVHqkbFgFuWU0HXV9s6TGf9QM,4442
2
- junshan_kit/DataSets.py,sha256=BNlXUbsautITelxp35uHSYSq2bTbIbqLyQnmqapaeDc,3963
3
- junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
5
- junshan_kit-2.2.9.dist-info/METADATA,sha256=oIytayt1Z9OpFOJl8_7cYMl3li3KsphQfZDRqsqat8g,329
6
- junshan_kit-2.2.9.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
7
- junshan_kit-2.2.9.dist-info/RECORD,,