junshan-kit 2.3.5__py2.py3-none-any.whl → 2.3.7__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
junshan_kit/DataProcessor.py
CHANGED
@@ -8,9 +8,87 @@
|
|
8
8
|
import pandas as pd
|
9
9
|
from sklearn.preprocessing import StandardScaler
|
10
10
|
|
11
|
+
|
11
12
|
class CSV_TO_Pandas:
|
12
13
|
def __init__(self):
|
13
14
|
pass
|
15
|
+
|
16
|
+
|
17
|
+
def _trans_time_fea(self, df, time_info: dict):
|
18
|
+
"""
|
19
|
+
Transform and extract time-based features from a specified datetime column.
|
20
|
+
|
21
|
+
This function converts a given column to pandas datetime format and
|
22
|
+
extracts different time-related features based on the specified mode.
|
23
|
+
It supports two extraction modes:
|
24
|
+
- type = 0: Extracts basic components (year, month, day, hour)
|
25
|
+
- type = 1: Extracts hour, day of week, and weekend indicator
|
26
|
+
|
27
|
+
Parameters
|
28
|
+
----------
|
29
|
+
df : pandas.DataFrame
|
30
|
+
Input DataFrame containing the datetime column.
|
31
|
+
time_info:
|
32
|
+
- time_col_name : str
|
33
|
+
Name of the column containing time or datetime values.
|
34
|
+
- trans_type : int, optional, default=1
|
35
|
+
Extraction mode.
|
36
|
+
- 0 : Extract ['year', 'month', 'day', 'hour']
|
37
|
+
- 1 : Extract ['hour', 'dayofweek', 'is_weekend']
|
38
|
+
|
39
|
+
Returns
|
40
|
+
-------
|
41
|
+
pandas.DataFrame
|
42
|
+
The DataFrame with newly added time-based feature columns.
|
43
|
+
|
44
|
+
Notes
|
45
|
+
-----
|
46
|
+
- Rows that cannot be parsed as valid datetime will be dropped automatically.
|
47
|
+
- 'dayofweek' ranges from 0 (Monday) to 6 (Sunday).
|
48
|
+
- 'is_weekend' equals 1 if the day is Saturday or Sunday, otherwise 0.
|
49
|
+
|
50
|
+
Examples
|
51
|
+
--------
|
52
|
+
>>> import pandas as pd
|
53
|
+
>>> data = pd.DataFrame({
|
54
|
+
... 'timestamp': ['2023-08-01 12:30:00', '2023-08-05 08:15:00', 'invalid_time']
|
55
|
+
... })
|
56
|
+
>>> df = handler._trans_time_fea(data, {"time_col_name": "timestamp", "trans_type": 1})
|
57
|
+
>>> print(df)
|
58
|
+
timestamp hour dayofweek is_weekend
|
59
|
+
0 2023-08-01 12:30:00 12 1 0
|
60
|
+
1 2023-08-05 08:15:00 8 5 1
|
61
|
+
"""
|
62
|
+
|
63
|
+
time_col_name, trans_type = time_info['time_col_name'], time_info['trans_type']
|
64
|
+
|
65
|
+
df[time_col_name] = pd.to_datetime(df[time_col_name], errors="coerce")
|
66
|
+
|
67
|
+
# Drop rows where the datetime conversion failed, and make an explicit copy
|
68
|
+
df = df.dropna(subset=[time_col_name]).copy()
|
69
|
+
|
70
|
+
if trans_type == 0:
|
71
|
+
df.loc[:, "year"] = df[time_col_name].dt.year
|
72
|
+
df.loc[:, "month"] = df[time_col_name].dt.month
|
73
|
+
df.loc[:, "day"] = df[time_col_name].dt.day
|
74
|
+
df.loc[:, "hour"] = df[time_col_name].dt.hour
|
75
|
+
|
76
|
+
user_text_fea = ['year','month','day', 'hour']
|
77
|
+
df = pd.get_dummies(df, columns=user_text_fea, dtype=int)
|
78
|
+
|
79
|
+
elif trans_type == 1:
|
80
|
+
df.loc[:, "hour"] = df[time_col_name].dt.hour
|
81
|
+
df.loc[:, "dayofweek"] = df[time_col_name].dt.dayofweek
|
82
|
+
df.loc[:, "is_weekend"] = df["dayofweek"].isin([5, 6]).astype(int)
|
83
|
+
|
84
|
+
user_text_fea = ['hour','dayofweek','is_weekend']
|
85
|
+
df = pd.get_dummies(df, columns=user_text_fea, dtype=int)
|
86
|
+
else:
|
87
|
+
print("error!")
|
88
|
+
|
89
|
+
df = df.drop(columns=[time_col_name])
|
90
|
+
|
91
|
+
return df
|
14
92
|
|
15
93
|
def preprocess_dataset(
|
16
94
|
self,
|
@@ -19,9 +97,10 @@ class CSV_TO_Pandas:
|
|
19
97
|
label_col: str,
|
20
98
|
label_map: dict,
|
21
99
|
title_name: str,
|
22
|
-
user_one_hot_cols
|
100
|
+
user_one_hot_cols=[],
|
23
101
|
print_info=False,
|
24
|
-
Standard
|
102
|
+
Standard=False,
|
103
|
+
time_info: dict | None = None
|
25
104
|
):
|
26
105
|
"""
|
27
106
|
Preprocess a CSV dataset by performing data cleaning, label mapping, and feature encoding.
|
@@ -72,6 +151,9 @@ class CSV_TO_Pandas:
|
|
72
151
|
# Save original size
|
73
152
|
m_original, n_original = df.shape
|
74
153
|
|
154
|
+
if time_info is not None:
|
155
|
+
df = self._trans_time_fea(df, time_info)
|
156
|
+
|
75
157
|
# Step 1: Drop non-informative columns
|
76
158
|
df = df.drop(columns=drop_cols)
|
77
159
|
|
@@ -90,12 +172,19 @@ class CSV_TO_Pandas:
|
|
90
172
|
col for col in text_feature_cols if col != label_col
|
91
173
|
] # ✅ exclude label
|
92
174
|
|
93
|
-
df = pd.get_dummies(
|
175
|
+
df = pd.get_dummies(
|
176
|
+
df, columns=text_feature_cols + user_one_hot_cols, dtype=int
|
177
|
+
)
|
94
178
|
m_cleaned, n_cleaned = df.shape
|
95
179
|
|
96
180
|
if Standard:
|
97
|
-
|
98
|
-
num_cols = [
|
181
|
+
# Identify numerical columns Standardize numerical columns
|
182
|
+
num_cols = [
|
183
|
+
col
|
184
|
+
for col in df.columns
|
185
|
+
if col
|
186
|
+
not in list(text_feature_cols) + [label_col] + [user_one_hot_cols]
|
187
|
+
]
|
99
188
|
scaler = StandardScaler()
|
100
189
|
df[num_cols] = scaler.fit_transform(df[num_cols])
|
101
190
|
|
@@ -119,8 +208,9 @@ class CSV_TO_Pandas:
|
|
119
208
|
)
|
120
209
|
print("-" * 80)
|
121
210
|
print(f"Note:")
|
122
|
-
print(f"{'Label column:':<40} {label_col}")
|
211
|
+
print(f"{'Label column:':<40} {label_col}")
|
123
212
|
print(f"{'label_map:':<40} {label_map}")
|
213
|
+
print(f"{'time column:':<40} {time_info}")
|
124
214
|
print(
|
125
215
|
f"{'Dropped non-feature columns:':<40} {', '.join(drop_cols) if drop_cols else 'None'}"
|
126
216
|
)
|
@@ -130,7 +220,7 @@ class CSV_TO_Pandas:
|
|
130
220
|
print("=" * 80 + "\n")
|
131
221
|
|
132
222
|
return df
|
133
|
-
|
223
|
+
|
134
224
|
|
135
225
|
class StepByStep:
|
136
226
|
def __init__(self):
|
@@ -139,6 +229,8 @@ class StepByStep:
|
|
139
229
|
def print_text_fea(self, df, text_feature_cols):
|
140
230
|
for col in text_feature_cols:
|
141
231
|
print(f"\n{'-'*80}")
|
142
|
-
print(f
|
232
|
+
print(f'Feature: "{col}"')
|
143
233
|
print(f"{'-'*80}")
|
144
|
-
print(
|
234
|
+
print(
|
235
|
+
f"Unique values ({len(df[col].unique())}): {df[col].unique().tolist()}"
|
236
|
+
)
|
junshan_kit/DataSets.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
"""
|
2
2
|
----------------------------------------------------------------------
|
3
3
|
>>> Author : Junshan Yin
|
4
|
-
>>> Last Updated : 2025-
|
4
|
+
>>> Last Updated : 2025-10-16
|
5
5
|
----------------------------------------------------------------------
|
6
6
|
"""
|
7
7
|
|
@@ -46,7 +46,7 @@ def _export_csv(df, data_name):
|
|
46
46
|
print(path + f'{data_name}.csv')
|
47
47
|
|
48
48
|
|
49
|
-
def _run(csv_path, data_name, drop_cols, label_col, label_map, print_info, user_one_hot_cols = [], export_csv = False, ):
|
49
|
+
def _run(csv_path, data_name, drop_cols, label_col, label_map, print_info, user_one_hot_cols = [], export_csv = False, time_info = None):
|
50
50
|
if not os.path.exists(csv_path):
|
51
51
|
print('\n' + '*'*60)
|
52
52
|
print(f"Please download the data.")
|
@@ -55,7 +55,7 @@ def _run(csv_path, data_name, drop_cols, label_col, label_map, print_info, user_
|
|
55
55
|
# junshan_kit.kit.unzip_file(f'./exp_data/{data_name}/{data_name}.zip', f'./exp_data/{data_name}')
|
56
56
|
|
57
57
|
cleaner = junshan_kit.DataProcessor.CSV_TO_Pandas()
|
58
|
-
df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, data_name, user_one_hot_cols, print_info=print_info)
|
58
|
+
df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, data_name, user_one_hot_cols, print_info=print_info, time_info = time_info)
|
59
59
|
|
60
60
|
if export_csv:
|
61
61
|
_export_csv(df, data_name)
|
@@ -166,91 +166,220 @@ def adult_income_prediction(data_name = "Adult Income Prediction", print_info =
|
|
166
166
|
return df
|
167
167
|
|
168
168
|
|
169
|
-
|
170
|
-
|
171
169
|
def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False):
|
172
170
|
csv_path = f'./exp_data/{data_name}/TNweather_1.8M.csv'
|
171
|
+
drop_cols = []
|
173
172
|
label_col = 'rain_tomorrow'
|
174
173
|
label_map = {0: -1, 1: 1}
|
175
174
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
# Step 0: Load the dataset
|
184
|
-
df = pd.read_csv(csv_path)
|
185
|
-
|
186
|
-
df['time'] = pd.to_datetime(df['time'])
|
187
|
-
df['year'] = df['time'].dt.year
|
188
|
-
df['month'] = df['time'].dt.month
|
189
|
-
df['day'] = df['time'].dt.day
|
190
|
-
df['hour'] = df['time'].dt.hour
|
191
|
-
|
192
|
-
user_one_hot_cols = ['year','month','day', 'hour']
|
193
|
-
drop_cols = ['Unnamed: 0', 'time']
|
175
|
+
# Extraction mode.
|
176
|
+
# - 0 : Extract ['year', 'month', 'day', 'hour']
|
177
|
+
# - 1 : Extract ['hour', 'dayofweek', 'is_weekend']
|
178
|
+
time_info = {
|
179
|
+
'time_col_name': 'time',
|
180
|
+
'trans_type': 0
|
181
|
+
}
|
194
182
|
|
195
|
-
|
196
|
-
m_original, n_original = df.shape
|
183
|
+
df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info, time_info = time_info)
|
197
184
|
|
198
|
-
|
199
|
-
df = df.drop(columns=drop_cols)
|
185
|
+
return df
|
200
186
|
|
201
|
-
# Step 2: Remove rows with missing values
|
202
|
-
df = df.dropna(axis=0, how="any")
|
203
|
-
m_encoded, n_encoded = df.shape
|
204
187
|
|
205
|
-
|
206
|
-
|
188
|
+
# def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False):
|
189
|
+
# csv_path = f'./exp_data/{data_name}/TNweather_1.8M.csv'
|
190
|
+
# label_col = 'rain_tomorrow'
|
191
|
+
# label_map = {0: -1, 1: 1}
|
207
192
|
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
193
|
+
# if not os.path.exists(csv_path):
|
194
|
+
# print('\n' + '*'*60)
|
195
|
+
# print(f"Please download the data.")
|
196
|
+
# print(csv_path)
|
197
|
+
# _download_data(data_name)
|
198
|
+
# # junshan_kit.kit.unzip_file(f'./exp_data/{data_name}/{data_name}.zip', f'./exp_data/{data_name}')
|
199
|
+
|
200
|
+
# # Step 0: Load the dataset
|
201
|
+
# df = pd.read_csv(csv_path)
|
215
202
|
|
216
|
-
|
217
|
-
|
203
|
+
# df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
|
204
|
+
# df = df.dropna(subset=['timestamp'])
|
218
205
|
|
219
|
-
|
220
|
-
|
221
|
-
|
206
|
+
# df['time'] = pd.to_datetime(df['time'])
|
207
|
+
# df['year'] = df['time'].dt.year
|
208
|
+
# df['month'] = df['time'].dt.month
|
209
|
+
# df['day'] = df['time'].dt.day
|
210
|
+
# df['hour'] = df['time'].dt.hour
|
211
|
+
|
212
|
+
# user_one_hot_cols = ['year','month','day', 'hour']
|
213
|
+
# drop_cols = ['Unnamed: 0', 'time']
|
214
|
+
|
215
|
+
# # Save original size
|
216
|
+
# m_original, n_original = df.shape
|
217
|
+
|
218
|
+
# # Step 1: Drop non-informative columns
|
219
|
+
# df = df.drop(columns=drop_cols)
|
220
|
+
|
221
|
+
# # Step 2: Remove rows with missing values
|
222
|
+
# df = df.dropna(axis=0, how="any")
|
223
|
+
# m_encoded, n_encoded = df.shape
|
224
|
+
|
225
|
+
# # Step 3: Map target label (to -1 and +1)
|
226
|
+
# df[label_col] = df[label_col].map(label_map)
|
227
|
+
|
228
|
+
# # Step 4: Encode categorical features (exclude label column)
|
229
|
+
# text_feature_cols = df.select_dtypes(
|
230
|
+
# include=["object", "string", "category"]
|
231
|
+
# ).columns
|
232
|
+
# text_feature_cols = [
|
233
|
+
# col for col in text_feature_cols if col != label_col
|
234
|
+
# ] # ✅ exclude label
|
235
|
+
|
236
|
+
# df = pd.get_dummies(df, columns=text_feature_cols + user_one_hot_cols, dtype=int)
|
237
|
+
# m_cleaned, n_cleaned = df.shape
|
238
|
+
|
239
|
+
# num_cols = [col for col in df.columns if col not in list(text_feature_cols) + [label_col] + user_one_hot_cols]
|
240
|
+
# scaler = StandardScaler()
|
241
|
+
# df[num_cols] = scaler.fit_transform(df[num_cols])
|
242
|
+
|
243
|
+
# if export_csv:
|
244
|
+
# _export_csv(df, data_name)
|
245
|
+
|
246
|
+
# # print info
|
247
|
+
# if print_info:
|
248
|
+
# pos_count = (df[label_col] == 1).sum()
|
249
|
+
# neg_count = (df[label_col] == -1).sum()
|
250
|
+
|
251
|
+
# # Step 6: Print dataset information
|
252
|
+
# print("\n" + "=" * 80)
|
253
|
+
# print(f"{f'{data_name} - Info':^70}")
|
254
|
+
# print("=" * 80)
|
255
|
+
# print(f"{'Original size:':<40} {m_original} rows x {n_original} cols")
|
256
|
+
# print(
|
257
|
+
# f"{'Size after dropping NaN & non-feature cols:':<40} {m_encoded} rows x {n_encoded} cols"
|
258
|
+
# )
|
259
|
+
# print(f"{'Positive samples (+1):':<40} {pos_count}")
|
260
|
+
# print(f"{'Negative samples (-1):':<40} {neg_count}")
|
261
|
+
# print(
|
262
|
+
# f"{'Size after one-hot encoding:':<40} {m_cleaned} rows x {n_cleaned} cols"
|
263
|
+
# )
|
264
|
+
# print("-" * 80)
|
265
|
+
# print(f"Note:")
|
266
|
+
# print(f"{'Label column:':<40} {label_col}")
|
267
|
+
# print(f"{'label_map:':<40} {label_map}")
|
268
|
+
# print(
|
269
|
+
# f"{'Dropped non-feature columns:':<40} {', '.join(drop_cols) if drop_cols else 'None'}"
|
270
|
+
# )
|
271
|
+
# print(
|
272
|
+
# f"{'text fetaure columns:':<40} {', '.join(list(text_feature_cols)) if list(text_feature_cols) else 'None'}"
|
273
|
+
# )
|
274
|
+
# print("=" * 80 + "\n")
|
275
|
+
|
276
|
+
# return df
|
277
|
+
|
278
|
+
|
279
|
+
# def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False):
|
280
|
+
# csv_path = f'./exp_data/{data_name}/youtube recommendation dataset.csv'
|
281
|
+
# drop_cols = ['user_id']
|
282
|
+
# label_col = 'subscribed_after'
|
283
|
+
# label_map = {0: -1, 1: 1}
|
284
|
+
|
285
|
+
# if not os.path.exists(csv_path):
|
286
|
+
# print('\n' + '*'*60)
|
287
|
+
# print(f"Please download the data.")
|
288
|
+
# print(csv_path)
|
289
|
+
# _download_data(data_name)
|
290
|
+
# # junshan_kit.kit.unzip_file(f'./exp_data/{data_name}/{data_name}.zip', f'./exp_data/{data_name}')
|
291
|
+
|
292
|
+
# # Step 0: Load the dataset
|
293
|
+
# df = pd.read_csv(csv_path)
|
222
294
|
|
223
|
-
|
224
|
-
|
295
|
+
# df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
|
296
|
+
# df = df.dropna(subset=['timestamp'])
|
225
297
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
298
|
+
# df["hour"] = df['timestamp'].dt.hour
|
299
|
+
# df["dayofweek"] = df['timestamp'].dt.dayofweek
|
300
|
+
# df["is_weekend"] = df["dayofweek"].isin([5, 6]).astype(int)
|
301
|
+
|
302
|
+
# user_one_hot_cols = ['dayofweek','is_weekend','hour']
|
303
|
+
# drop_cols = ['user_id', 'timestamp']
|
304
|
+
|
305
|
+
# # Save original size
|
306
|
+
# m_original, n_original = df.shape
|
307
|
+
|
308
|
+
# # Step 1: Drop non-informative columns
|
309
|
+
# df = df.drop(columns=drop_cols)
|
310
|
+
|
311
|
+
# # Step 2: Remove rows with missing values
|
312
|
+
# df = df.dropna(axis=0, how="any")
|
313
|
+
# m_encoded, n_encoded = df.shape
|
314
|
+
|
315
|
+
# # Step 3: Map target label (to -1 and +1)
|
316
|
+
# df[label_col] = df[label_col].map(label_map)
|
317
|
+
|
318
|
+
# # Step 4: Encode categorical features (exclude label column)
|
319
|
+
# text_feature_cols = df.select_dtypes(
|
320
|
+
# include=["object", "string", "category"]
|
321
|
+
# ).columns
|
322
|
+
# text_feature_cols = [
|
323
|
+
# col for col in text_feature_cols if col != label_col
|
324
|
+
# ] # ✅ exclude label
|
325
|
+
|
326
|
+
# df = pd.get_dummies(df, columns=text_feature_cols + user_one_hot_cols, dtype=int)
|
327
|
+
# m_cleaned, n_cleaned = df.shape
|
328
|
+
|
329
|
+
# num_cols = [col for col in df.columns if col not in list(text_feature_cols) + [label_col] + user_one_hot_cols]
|
330
|
+
# scaler = StandardScaler()
|
331
|
+
# df[num_cols] = scaler.fit_transform(df[num_cols])
|
332
|
+
|
333
|
+
# if export_csv:
|
334
|
+
# _export_csv(df, data_name)
|
335
|
+
|
336
|
+
# # print info
|
337
|
+
# if print_info:
|
338
|
+
# pos_count = (df[label_col] == 1).sum()
|
339
|
+
# neg_count = (df[label_col] == -1).sum()
|
340
|
+
|
341
|
+
# # Step 6: Print dataset information
|
342
|
+
# print("\n" + "=" * 80)
|
343
|
+
# print(f"{f'{data_name} - Info':^70}")
|
344
|
+
# print("=" * 80)
|
345
|
+
# print(f"{'Original size:':<40} {m_original} rows x {n_original} cols")
|
346
|
+
# print(
|
347
|
+
# f"{'Size after dropping NaN & non-feature cols:':<40} {m_encoded} rows x {n_encoded} cols"
|
348
|
+
# )
|
349
|
+
# print(f"{'Positive samples (+1):':<40} {pos_count}")
|
350
|
+
# print(f"{'Negative samples (-1):':<40} {neg_count}")
|
351
|
+
# print(
|
352
|
+
# f"{'Size after one-hot encoding:':<40} {m_cleaned} rows x {n_cleaned} cols"
|
353
|
+
# )
|
354
|
+
# print("-" * 80)
|
355
|
+
# print(f"Note:")
|
356
|
+
# print(f"{'Label column:':<40} {label_col}")
|
357
|
+
# print(f"{'label_map:':<40} {label_map}")
|
358
|
+
# print(
|
359
|
+
# f"{'Dropped non-feature columns:':<40} {', '.join(drop_cols) if drop_cols else 'None'}"
|
360
|
+
# )
|
361
|
+
# print(
|
362
|
+
# f"{'text fetaure columns:':<40} {', '.join(list(text_feature_cols)) if list(text_feature_cols) else 'None'}"
|
363
|
+
# )
|
364
|
+
# print("=" * 80 + "\n")
|
365
|
+
|
366
|
+
# return df
|
367
|
+
|
368
|
+
|
369
|
+
def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False):
|
370
|
+
csv_path = f'./exp_data/{data_name}/youtube recommendation dataset.csv'
|
371
|
+
drop_cols = ['user_id']
|
372
|
+
label_col = 'subscribed_after'
|
373
|
+
label_map = {0: -1, 1: 1}
|
255
374
|
|
256
|
-
|
375
|
+
# Extraction mode.
|
376
|
+
# - 0 : Extract ['year', 'month', 'day', 'hour']
|
377
|
+
# - 1 : Extract ['hour', 'dayofweek', 'is_weekend']
|
378
|
+
time_info = {
|
379
|
+
'time_col_name': 'timestamp',
|
380
|
+
'trans_type': 1
|
381
|
+
}
|
382
|
+
|
383
|
+
df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info, time_info = time_info)
|
384
|
+
|
385
|
+
return df
|
@@ -1,11 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: junshan_kit
|
3
|
-
Version: 2.3.
|
3
|
+
Version: 2.3.7
|
4
4
|
Summary: This is an optimization tool.
|
5
5
|
Author-email: Junshan Yin <junshanyin@163.com>
|
6
6
|
Requires-Dist: kaggle==1.7.4.5
|
7
7
|
Requires-Dist: kagglehub==0.3.13
|
8
|
-
Requires-Dist: numpy==2.2.
|
8
|
+
Requires-Dist: numpy==2.2.7
|
9
9
|
Requires-Dist: pandas==2.3.3
|
10
10
|
Requires-Dist: scikit-learn==1.7.1
|
11
11
|
Requires-Dist: selenium==4.36.0
|
@@ -0,0 +1,7 @@
|
|
1
|
+
junshan_kit/DataProcessor.py,sha256=MOKMkq4OE32VyLkgUD-D2J5dORmUDLfylAir0UiI04E,8665
|
2
|
+
junshan_kit/DataSets.py,sha256=EgDPN7Sm6MLSwxBpJE_A5TN-6eVsjGLjFoZdgg-BnZ8,13819
|
3
|
+
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
|
5
|
+
junshan_kit-2.3.7.dist-info/METADATA,sha256=kYYOgCdx-lIUDOnK2nfxReqCommjtEW-25MxUDOpS6w,329
|
6
|
+
junshan_kit-2.3.7.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
7
|
+
junshan_kit-2.3.7.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
junshan_kit/DataProcessor.py,sha256=niI7kun5lcBpTJaHzATE5vqnD_9GTyTID9fcKeYHxZ0,5316
|
2
|
-
junshan_kit/DataSets.py,sha256=L3D0eBCKHWqpy3qXZvWQP_yKaNzWyj5W1_OLS736xjg,8972
|
3
|
-
junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
|
5
|
-
junshan_kit-2.3.5.dist-info/METADATA,sha256=mZnRM7gqHpgFRZPQ1caQHNeUm7bpTW-XsM0vf733xDE,329
|
6
|
-
junshan_kit-2.3.5.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
|
7
|
-
junshan_kit-2.3.5.dist-info/RECORD,,
|
File without changes
|