junshan-kit 2.4.5__py2.py3-none-any.whl → 2.4.6__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of junshan-kit might be problematic. Click here for more details.

@@ -217,15 +217,14 @@ class CSV_TO_Pandas:
217
217
  print(f"{'time column:':<40} {time_info}")
218
218
  if time_info is not None:
219
219
  print(f"{'trans_type : int, optional, default=1'}")
220
- print(f"{- 0 : Extract ['year', 'month', 'day', 'hour']:<50}")
221
- print(f"{ - 1 : Extract ['hour', 'dayofweek', 'is_weekend']:<50}")
220
+ print("- 0 : Extract ['year', 'month', 'day', 'hour']")
221
+ print("- 1 : Extract ['hour', 'dayofweek', 'is_weekend']")
222
222
  print(
223
223
  f"{'text fetaure columns:':<40} {', '.join(list(text_feature_cols)) if list(text_feature_cols) else 'None'}"
224
224
  )
225
225
  print("-" * 80)
226
- print(
227
- f"{'all columns:':<40} {', '.join(columns)}"
228
- )
226
+ print("all columns:")
227
+ print(list(columns))
229
228
  print("=" * 80 + "\n")
230
229
 
231
230
  return df
junshan_kit/DataSets.py CHANGED
@@ -49,7 +49,7 @@ def _export_csv(df, data_name, data_type):
49
49
  print(path + f'{data_name}.csv')
50
50
 
51
51
 
52
- def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, user_one_hot_cols = [], export_csv = False, time_info = None):
52
+ def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, user_one_hot_cols = [], export_csv = False, time_info = None, Standard = False):
53
53
  if not os.path.exists(csv_path):
54
54
  print('\n' + '*'*60)
55
55
  print(f"Please download the data.")
@@ -73,7 +73,7 @@ def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_
73
73
  ----------------------------------------------------------------------
74
74
  """
75
75
 
76
- def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print_info = False, export_csv=False, drop_cols = []):
76
+ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print_info = False, export_csv=False, drop_cols = [], Standard = False):
77
77
 
78
78
  data_type = "binary"
79
79
  csv_path = f'./exp_data/{data_type}/{data_name}/creditcard.csv'
@@ -81,24 +81,24 @@ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print
81
81
  label_map = {0: -1, 1: 1}
82
82
 
83
83
 
84
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
84
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
85
85
 
86
86
 
87
87
  return df
88
88
 
89
89
 
90
- def diabetes_health_indicators(data_name = "Diabetes Health Indicators", print_info = False, export_csv = False, drop_cols = []):
90
+ def diabetes_health_indicators(data_name = "Diabetes Health Indicators", print_info = False, export_csv = False, drop_cols = [], Standard = False):
91
91
  data_type = "binary"
92
92
  csv_path = f'./exp_data/{data_type}/{data_name}/diabetes_dataset.csv'
93
93
  label_col = 'diagnosed_diabetes'
94
94
  label_map = {0: -1, 1: 1}
95
95
 
96
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
96
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
97
97
 
98
98
  return df
99
99
 
100
100
 
101
- def electric_vehicle_population(data_name = "Electric Vehicle Population", print_info = False, export_csv = False, drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']):
101
+ def electric_vehicle_population(data_name = "Electric Vehicle Population", print_info = False, export_csv = False, drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location'], Standard = False):
102
102
 
103
103
  data_type = "binary"
104
104
  csv_path = f'./exp_data/{data_type}/{data_name}/Electric_Vehicle_Population_Data.csv'
@@ -110,11 +110,11 @@ def electric_vehicle_population(data_name = "Electric Vehicle Population", print
110
110
  }
111
111
 
112
112
 
113
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
113
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
114
114
 
115
115
  return df
116
116
 
117
- def global_house_purchase(data_name = "Global House Purchase", print_info = False, export_csv = False, drop_cols = ['property_id']):
117
+ def global_house_purchase(data_name = "Global House Purchase", print_info = False, export_csv = False, drop_cols = ['property_id'], Standard =False):
118
118
 
119
119
  data_type = "binary"
120
120
  csv_path = f'./exp_data/{data_type}/{data_name}/global_house_purchase_dataset.csv'
@@ -122,12 +122,12 @@ def global_house_purchase(data_name = "Global House Purchase", print_info = Fals
122
122
  label_map = {0: -1, 1: 1}
123
123
 
124
124
 
125
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
125
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
126
126
 
127
127
  return df
128
128
 
129
129
 
130
- def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_csv = False, drop_cols = ['id']):
130
+ def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_csv = False, drop_cols = ['id'], Standard =False):
131
131
 
132
132
  data_type = "binary"
133
133
  csv_path = f'./exp_data/{data_type}/{data_name}/health_lifestyle_dataset.csv'
@@ -136,12 +136,12 @@ def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_
136
136
  label_map = {0: -1, 1: 1}
137
137
 
138
138
 
139
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
139
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
140
140
 
141
141
  return df
142
142
 
143
143
 
144
- def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Prediction", print_info = False, export_csv = False, drop_cols = ['alcohol_freq']):
144
+ def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Prediction", print_info = False, export_csv = False, drop_cols = ['alcohol_freq'], Standard = False):
145
145
  """
146
146
  1. The missing values in this dataset are handled by directly removing the corresponding column. Since the `alcohol_freq` column contains a large number of missing values, deleting the rows would result in significant data loss, so the entire column is dropped instead.
147
147
 
@@ -155,12 +155,12 @@ def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Predic
155
155
  label_map = {0: -1, 1: 1}
156
156
 
157
157
 
158
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
158
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
159
159
 
160
160
  return df
161
161
 
162
162
 
163
- def particle_physics_event_classification(data_name = "Particle Physics Event Classification", print_info = False, export_csv = False, drop_cols = []):
163
+ def particle_physics_event_classification(data_name = "Particle Physics Event Classification", print_info = False, export_csv = False, drop_cols = [], Standard =False):
164
164
 
165
165
  data_type = "binary"
166
166
  csv_path = f'./exp_data/{data_type}/{data_name}/Particle Physics Event Classification.csv'
@@ -169,13 +169,13 @@ def particle_physics_event_classification(data_name = "Particle Physics Event Cl
169
169
  label_map = {'s': -1, 'b': 1}
170
170
 
171
171
 
172
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
172
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
173
173
 
174
174
  return df
175
175
 
176
176
 
177
177
 
178
- def adult_income_prediction(data_name = "Adult Income Prediction", print_info = False, export_csv=False, drop_cols = []):
178
+ def adult_income_prediction(data_name = "Adult Income Prediction", print_info = False, export_csv=False, drop_cols = [], Standard = False):
179
179
 
180
180
  data_type = "binary"
181
181
  csv_path = f'./exp_data/{data_type}/{data_name}/adult.csv'
@@ -184,12 +184,12 @@ def adult_income_prediction(data_name = "Adult Income Prediction", print_info =
184
184
  label_map = {'<=50K': -1, '>50K': 1}
185
185
 
186
186
 
187
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
187
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
188
188
 
189
189
  return df
190
190
 
191
191
 
192
- def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False, drop_cols = ['Unnamed: 0']):
192
+ def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False, drop_cols = ['Unnamed: 0'], Standard = False):
193
193
 
194
194
  data_type = "binary"
195
195
  csv_path = f'./exp_data/{data_type}/{data_name}/TNweather_1.8M.csv'
@@ -197,20 +197,17 @@ def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info =
197
197
  label_col = 'rain_tomorrow'
198
198
  label_map = {0: -1, 1: 1}
199
199
 
200
- # Extraction mode.
201
- # - 0 : Extract ['year', 'month', 'day', 'hour']
202
- # - 1 : Extract ['hour', 'dayofweek', 'is_weekend']
203
200
  time_info = {
204
201
  'time_col_name': 'time',
205
202
  'trans_type': 0
206
203
  }
207
204
 
208
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info)
205
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info, Standard = Standard)
209
206
 
210
207
 
211
208
  return df
212
209
 
213
- def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False, drop_cols = ['user_id']):
210
+ def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False, drop_cols = ['user_id'], Standard = False):
214
211
 
215
212
  data_type = "binary"
216
213
  csv_path = f'./exp_data/{data_type}/{data_name}/youtube recommendation dataset.csv'
@@ -226,6 +223,6 @@ def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = Fa
226
223
  'trans_type': 1
227
224
  }
228
225
 
229
- df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info)
226
+ df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info, Standard = Standard)
230
227
 
231
228
  return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: junshan_kit
3
- Version: 2.4.5
3
+ Version: 2.4.6
4
4
  Summary: This is an optimization tool.
5
5
  Author-email: Junshan Yin <junshanyin@163.com>
6
6
  Requires-Dist: kaggle==1.7.4.5
@@ -1,6 +1,6 @@
1
1
  junshan_kit/ComOptimizers.py,sha256=MUgFnm1DbbvNKv5-7nHJCLOfq4VjoNk1KLRR-ji5rOA,4637
2
- junshan_kit/DataProcessor.py,sha256=oAbf9QsCaLjnN2yrcn8qGof56dSkEv9jHkWiB1CxfTw,9106
3
- junshan_kit/DataSets.py,sha256=PspH23YbB9cSuh5KQp7Dam3fWsfyH0pwL12nt7KN_tQ,8470
2
+ junshan_kit/DataProcessor.py,sha256=hX9W7wmte941jQkt834BcOUKnkkODX7oPW9HrlJCfkc,9064
3
+ junshan_kit/DataSets.py,sha256=iQrCTubKCSB7xvKS9LvSdZzNTuOEj78vH3KYC1cQNDw,8743
4
4
  junshan_kit/ExperimentHub.py,sha256=MKduxa7U16zMoavgS-lVOCL2ypcMLpAaD8k7JitNqRU,11493
5
5
  junshan_kit/Models.py,sha256=GRTunJON1vLQz2IxgsoOKvjP-3zSJJLuB3CkJTAiImo,6884
6
6
  junshan_kit/Print_Info.py,sha256=vogYcXvoGcRGZV-7svi_mtiCZH6c8d-RhbZLFrLbKr8,3012
@@ -11,6 +11,6 @@ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  junshan_kit/check_args.py,sha256=7m4xSPAwqqQ0SPeKc-MCewDIDB7kFgsNYS2AuTTzGtk,3599
12
12
  junshan_kit/datahub.py,sha256=4c3P2TORMZ4va6NrSiojDCpnY_CGDlJV-5PG3u1_Isk,9081
13
13
  junshan_kit/kit.py,sha256=hpA4Zpn1VAuhdJSBBXswVum0CSk6QnB05GGLYoaRatQ,9792
14
- junshan_kit-2.4.5.dist-info/METADATA,sha256=w5OjSbU0MXzViQmIv8J2YR1Jx87gMWVyKeqEIuR3AUU,266
15
- junshan_kit-2.4.5.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
16
- junshan_kit-2.4.5.dist-info/RECORD,,
14
+ junshan_kit-2.4.6.dist-info/METADATA,sha256=jAn8rIxCSoRZe1ZcjTBsqfD-5cAlhydSORSMb2sr_4I,266
15
+ junshan_kit-2.4.6.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
16
+ junshan_kit-2.4.6.dist-info/RECORD,,