junshan-kit 2.4.5__py2.py3-none-any.whl → 2.4.6__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of junshan-kit might be problematic. Click here for more details.
junshan_kit/DataProcessor.py
CHANGED
|
@@ -217,15 +217,14 @@ class CSV_TO_Pandas:
|
|
|
217
217
|
print(f"{'time column:':<40} {time_info}")
|
|
218
218
|
if time_info is not None:
|
|
219
219
|
print(f"{'trans_type : int, optional, default=1'}")
|
|
220
|
-
print(
|
|
221
|
-
print(
|
|
220
|
+
print("- 0 : Extract ['year', 'month', 'day', 'hour']")
|
|
221
|
+
print("- 1 : Extract ['hour', 'dayofweek', 'is_weekend']")
|
|
222
222
|
print(
|
|
223
223
|
f"{'text fetaure columns:':<40} {', '.join(list(text_feature_cols)) if list(text_feature_cols) else 'None'}"
|
|
224
224
|
)
|
|
225
225
|
print("-" * 80)
|
|
226
|
-
print(
|
|
227
|
-
|
|
228
|
-
)
|
|
226
|
+
print("all columns:")
|
|
227
|
+
print(list(columns))
|
|
229
228
|
print("=" * 80 + "\n")
|
|
230
229
|
|
|
231
230
|
return df
|
junshan_kit/DataSets.py
CHANGED
|
@@ -49,7 +49,7 @@ def _export_csv(df, data_name, data_type):
|
|
|
49
49
|
print(path + f'{data_name}.csv')
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, user_one_hot_cols = [], export_csv = False, time_info = None):
|
|
52
|
+
def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, user_one_hot_cols = [], export_csv = False, time_info = None, Standard = False):
|
|
53
53
|
if not os.path.exists(csv_path):
|
|
54
54
|
print('\n' + '*'*60)
|
|
55
55
|
print(f"Please download the data.")
|
|
@@ -73,7 +73,7 @@ def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_
|
|
|
73
73
|
----------------------------------------------------------------------
|
|
74
74
|
"""
|
|
75
75
|
|
|
76
|
-
def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print_info = False, export_csv=False, drop_cols = []):
|
|
76
|
+
def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print_info = False, export_csv=False, drop_cols = [], Standard = False):
|
|
77
77
|
|
|
78
78
|
data_type = "binary"
|
|
79
79
|
csv_path = f'./exp_data/{data_type}/{data_name}/creditcard.csv'
|
|
@@ -81,24 +81,24 @@ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print
|
|
|
81
81
|
label_map = {0: -1, 1: 1}
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
84
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
85
85
|
|
|
86
86
|
|
|
87
87
|
return df
|
|
88
88
|
|
|
89
89
|
|
|
90
|
-
def diabetes_health_indicators(data_name = "Diabetes Health Indicators", print_info = False, export_csv = False, drop_cols = []):
|
|
90
|
+
def diabetes_health_indicators(data_name = "Diabetes Health Indicators", print_info = False, export_csv = False, drop_cols = [], Standard = False):
|
|
91
91
|
data_type = "binary"
|
|
92
92
|
csv_path = f'./exp_data/{data_type}/{data_name}/diabetes_dataset.csv'
|
|
93
93
|
label_col = 'diagnosed_diabetes'
|
|
94
94
|
label_map = {0: -1, 1: 1}
|
|
95
95
|
|
|
96
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
96
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
97
97
|
|
|
98
98
|
return df
|
|
99
99
|
|
|
100
100
|
|
|
101
|
-
def electric_vehicle_population(data_name = "Electric Vehicle Population", print_info = False, export_csv = False, drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']):
|
|
101
|
+
def electric_vehicle_population(data_name = "Electric Vehicle Population", print_info = False, export_csv = False, drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location'], Standard = False):
|
|
102
102
|
|
|
103
103
|
data_type = "binary"
|
|
104
104
|
csv_path = f'./exp_data/{data_type}/{data_name}/Electric_Vehicle_Population_Data.csv'
|
|
@@ -110,11 +110,11 @@ def electric_vehicle_population(data_name = "Electric Vehicle Population", print
|
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
|
|
113
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
113
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
114
114
|
|
|
115
115
|
return df
|
|
116
116
|
|
|
117
|
-
def global_house_purchase(data_name = "Global House Purchase", print_info = False, export_csv = False, drop_cols = ['property_id']):
|
|
117
|
+
def global_house_purchase(data_name = "Global House Purchase", print_info = False, export_csv = False, drop_cols = ['property_id'], Standard =False):
|
|
118
118
|
|
|
119
119
|
data_type = "binary"
|
|
120
120
|
csv_path = f'./exp_data/{data_type}/{data_name}/global_house_purchase_dataset.csv'
|
|
@@ -122,12 +122,12 @@ def global_house_purchase(data_name = "Global House Purchase", print_info = Fals
|
|
|
122
122
|
label_map = {0: -1, 1: 1}
|
|
123
123
|
|
|
124
124
|
|
|
125
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
125
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
126
126
|
|
|
127
127
|
return df
|
|
128
128
|
|
|
129
129
|
|
|
130
|
-
def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_csv = False, drop_cols = ['id']):
|
|
130
|
+
def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_csv = False, drop_cols = ['id'], Standard =False):
|
|
131
131
|
|
|
132
132
|
data_type = "binary"
|
|
133
133
|
csv_path = f'./exp_data/{data_type}/{data_name}/health_lifestyle_dataset.csv'
|
|
@@ -136,12 +136,12 @@ def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_
|
|
|
136
136
|
label_map = {0: -1, 1: 1}
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
139
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
140
140
|
|
|
141
141
|
return df
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Prediction", print_info = False, export_csv = False, drop_cols = ['alcohol_freq']):
|
|
144
|
+
def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Prediction", print_info = False, export_csv = False, drop_cols = ['alcohol_freq'], Standard = False):
|
|
145
145
|
"""
|
|
146
146
|
1. The missing values in this dataset are handled by directly removing the corresponding column. Since the `alcohol_freq` column contains a large number of missing values, deleting the rows would result in significant data loss, so the entire column is dropped instead.
|
|
147
147
|
|
|
@@ -155,12 +155,12 @@ def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Predic
|
|
|
155
155
|
label_map = {0: -1, 1: 1}
|
|
156
156
|
|
|
157
157
|
|
|
158
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
158
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
159
159
|
|
|
160
160
|
return df
|
|
161
161
|
|
|
162
162
|
|
|
163
|
-
def particle_physics_event_classification(data_name = "Particle Physics Event Classification", print_info = False, export_csv = False, drop_cols = []):
|
|
163
|
+
def particle_physics_event_classification(data_name = "Particle Physics Event Classification", print_info = False, export_csv = False, drop_cols = [], Standard =False):
|
|
164
164
|
|
|
165
165
|
data_type = "binary"
|
|
166
166
|
csv_path = f'./exp_data/{data_type}/{data_name}/Particle Physics Event Classification.csv'
|
|
@@ -169,13 +169,13 @@ def particle_physics_event_classification(data_name = "Particle Physics Event Cl
|
|
|
169
169
|
label_map = {'s': -1, 'b': 1}
|
|
170
170
|
|
|
171
171
|
|
|
172
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
172
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
173
173
|
|
|
174
174
|
return df
|
|
175
175
|
|
|
176
176
|
|
|
177
177
|
|
|
178
|
-
def adult_income_prediction(data_name = "Adult Income Prediction", print_info = False, export_csv=False, drop_cols = []):
|
|
178
|
+
def adult_income_prediction(data_name = "Adult Income Prediction", print_info = False, export_csv=False, drop_cols = [], Standard = False):
|
|
179
179
|
|
|
180
180
|
data_type = "binary"
|
|
181
181
|
csv_path = f'./exp_data/{data_type}/{data_name}/adult.csv'
|
|
@@ -184,12 +184,12 @@ def adult_income_prediction(data_name = "Adult Income Prediction", print_info =
|
|
|
184
184
|
label_map = {'<=50K': -1, '>50K': 1}
|
|
185
185
|
|
|
186
186
|
|
|
187
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
|
|
187
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, Standard = Standard)
|
|
188
188
|
|
|
189
189
|
return df
|
|
190
190
|
|
|
191
191
|
|
|
192
|
-
def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False, drop_cols = ['Unnamed: 0']):
|
|
192
|
+
def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False, drop_cols = ['Unnamed: 0'], Standard = False):
|
|
193
193
|
|
|
194
194
|
data_type = "binary"
|
|
195
195
|
csv_path = f'./exp_data/{data_type}/{data_name}/TNweather_1.8M.csv'
|
|
@@ -197,20 +197,17 @@ def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info =
|
|
|
197
197
|
label_col = 'rain_tomorrow'
|
|
198
198
|
label_map = {0: -1, 1: 1}
|
|
199
199
|
|
|
200
|
-
# Extraction mode.
|
|
201
|
-
# - 0 : Extract ['year', 'month', 'day', 'hour']
|
|
202
|
-
# - 1 : Extract ['hour', 'dayofweek', 'is_weekend']
|
|
203
200
|
time_info = {
|
|
204
201
|
'time_col_name': 'time',
|
|
205
202
|
'trans_type': 0
|
|
206
203
|
}
|
|
207
204
|
|
|
208
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info)
|
|
205
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info, Standard = Standard)
|
|
209
206
|
|
|
210
207
|
|
|
211
208
|
return df
|
|
212
209
|
|
|
213
|
-
def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False, drop_cols = ['user_id']):
|
|
210
|
+
def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False, drop_cols = ['user_id'], Standard = False):
|
|
214
211
|
|
|
215
212
|
data_type = "binary"
|
|
216
213
|
csv_path = f'./exp_data/{data_type}/{data_name}/youtube recommendation dataset.csv'
|
|
@@ -226,6 +223,6 @@ def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = Fa
|
|
|
226
223
|
'trans_type': 1
|
|
227
224
|
}
|
|
228
225
|
|
|
229
|
-
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info)
|
|
226
|
+
df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv, time_info=time_info, Standard = Standard)
|
|
230
227
|
|
|
231
228
|
return df
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
junshan_kit/ComOptimizers.py,sha256=MUgFnm1DbbvNKv5-7nHJCLOfq4VjoNk1KLRR-ji5rOA,4637
|
|
2
|
-
junshan_kit/DataProcessor.py,sha256=
|
|
3
|
-
junshan_kit/DataSets.py,sha256=
|
|
2
|
+
junshan_kit/DataProcessor.py,sha256=hX9W7wmte941jQkt834BcOUKnkkODX7oPW9HrlJCfkc,9064
|
|
3
|
+
junshan_kit/DataSets.py,sha256=iQrCTubKCSB7xvKS9LvSdZzNTuOEj78vH3KYC1cQNDw,8743
|
|
4
4
|
junshan_kit/ExperimentHub.py,sha256=MKduxa7U16zMoavgS-lVOCL2ypcMLpAaD8k7JitNqRU,11493
|
|
5
5
|
junshan_kit/Models.py,sha256=GRTunJON1vLQz2IxgsoOKvjP-3zSJJLuB3CkJTAiImo,6884
|
|
6
6
|
junshan_kit/Print_Info.py,sha256=vogYcXvoGcRGZV-7svi_mtiCZH6c8d-RhbZLFrLbKr8,3012
|
|
@@ -11,6 +11,6 @@ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
11
11
|
junshan_kit/check_args.py,sha256=7m4xSPAwqqQ0SPeKc-MCewDIDB7kFgsNYS2AuTTzGtk,3599
|
|
12
12
|
junshan_kit/datahub.py,sha256=4c3P2TORMZ4va6NrSiojDCpnY_CGDlJV-5PG3u1_Isk,9081
|
|
13
13
|
junshan_kit/kit.py,sha256=hpA4Zpn1VAuhdJSBBXswVum0CSk6QnB05GGLYoaRatQ,9792
|
|
14
|
-
junshan_kit-2.4.
|
|
15
|
-
junshan_kit-2.4.
|
|
16
|
-
junshan_kit-2.4.
|
|
14
|
+
junshan_kit-2.4.6.dist-info/METADATA,sha256=jAn8rIxCSoRZe1ZcjTBsqfD-5cAlhydSORSMb2sr_4I,266
|
|
15
|
+
junshan_kit-2.4.6.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
|
16
|
+
junshan_kit-2.4.6.dist-info/RECORD,,
|
|
File without changes
|